1cb512458SBarry Smith #ifndef lint 2*e0f21b7aSSatish Balay static char vcid[] = "$Id: mpiaij.c,v 1.180 1996/12/18 18:47:24 curfman Exp balay $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 570f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h" 6f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 7d9942c19SSatish Balay #include "src/inline/spops.h" 88a729477SBarry Smith 99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 119e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 129e25ed09SBarry Smith length of colmap equals the global matrix length. 139e25ed09SBarry Smith */ 14*e0f21b7aSSatish Balay #undef __FUNCTION__ 15*e0f21b7aSSatish Balay #define __FUNCTION__ "CreateColmap_MPIAIJ_Private" 160a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat) 179e25ed09SBarry Smith { 1844a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 19ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 20905e6a2fSBarry Smith int n = B->n,i; 21dbb450caSBarry Smith 220452661fSBarry Smith aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 23464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 24cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 25905e6a2fSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 269e25ed09SBarry Smith return 0; 279e25ed09SBarry Smith } 289e25ed09SBarry Smith 292493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 302493cbb0SBarry Smith 31*e0f21b7aSSatish Balay #undef __FUNCTION__ 32*e0f21b7aSSatish Balay #define __FUNCTION__ "MatGetRowIJ_MPIAIJ" 333b2fbd54SBarry Smith static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 343b2fbd54SBarry Smith PetscTruth *done) 35299609e3SLois Curfman McInnes { 36299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 37299609e3SLois Curfman McInnes int ierr; 3817699dbbSLois Curfman McInnes if (aij->size == 1) { 393b2fbd54SBarry Smith ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 403b2fbd54SBarry Smith } else SETERRQ(1,"MatGetRowIJ_MPIAIJ:not supported in parallel"); 413b2fbd54SBarry Smith return 0; 423b2fbd54SBarry Smith } 433b2fbd54SBarry Smith 44*e0f21b7aSSatish Balay #undef __FUNCTION__ 45*e0f21b7aSSatish Balay #define __FUNCTION__ "MatRestoreRowIJ_MPIAIJ" 463b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 473b2fbd54SBarry Smith PetscTruth *done) 483b2fbd54SBarry Smith { 493b2fbd54SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 503b2fbd54SBarry Smith int ierr; 513b2fbd54SBarry Smith if (aij->size == 1) { 523b2fbd54SBarry Smith ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 533b2fbd54SBarry Smith } else SETERRQ(1,"MatRestoreRowIJ_MPIAIJ:not supported in parallel"); 54299609e3SLois Curfman McInnes return 0; 55299609e3SLois Curfman McInnes } 56299609e3SLois Curfman McInnes 570a198c4cSBarry Smith extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 580a198c4cSBarry Smith 59*e0f21b7aSSatish Balay #undef __FUNCTION__ 60*e0f21b7aSSatish Balay #define __FUNCTION__ "MatSetValues_MPIAIJ" 614b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 628a729477SBarry Smith { 6344a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 644b0e389bSBarry Smith Scalar value; 651eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 661eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 67905e6a2fSBarry Smith int roworiented = aij->roworiented; 688a729477SBarry Smith 690a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 7064119d58SLois Curfman McInnes if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 7148d91487SBarry Smith SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 728a729477SBarry Smith } 730a198c4cSBarry Smith #endif 741eb62cbbSBarry Smith aij->insertmode = addv; 758a729477SBarry Smith for ( i=0; i<m; i++ ) { 760a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 774b0e389bSBarry Smith if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 784b0e389bSBarry Smith if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 790a198c4cSBarry Smith #endif 804b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 814b0e389bSBarry Smith row = im[i] - rstart; 821eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 834b0e389bSBarry Smith if (in[j] >= cstart && in[j] < cend){ 844b0e389bSBarry Smith col = in[j] - cstart; 854b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 860a198c4cSBarry Smith ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 871eb62cbbSBarry Smith } 880a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 890a198c4cSBarry Smith else if (in[j] < 0) {SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");} 900a198c4cSBarry Smith else if (in[j] >= aij->N) {SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");} 910a198c4cSBarry Smith #endif 921eb62cbbSBarry Smith else { 93227d817aSBarry Smith if (mat->was_assembled) { 94905e6a2fSBarry Smith if (!aij->colmap) { 95905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 96905e6a2fSBarry Smith } 97905e6a2fSBarry Smith col = aij->colmap[in[j]] - 1; 98ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 992493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 1004b0e389bSBarry Smith col = in[j]; 101d6dfbf8fSBarry Smith } 1029e25ed09SBarry Smith } 1034b0e389bSBarry Smith else col = in[j]; 1044b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 1050a198c4cSBarry Smith ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 1061eb62cbbSBarry Smith } 1071eb62cbbSBarry Smith } 1081eb62cbbSBarry Smith } 1091eb62cbbSBarry Smith else { 11090f02eecSBarry Smith if (roworiented && !aij->donotstash) { 1114b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 1124b0e389bSBarry Smith } 1134b0e389bSBarry Smith else { 11490f02eecSBarry Smith if (!aij->donotstash) { 1154b0e389bSBarry Smith row = im[i]; 1164b0e389bSBarry Smith for ( j=0; j<n; j++ ) { 1174b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 1184b0e389bSBarry Smith } 1194b0e389bSBarry Smith } 1201eb62cbbSBarry Smith } 1218a729477SBarry Smith } 12290f02eecSBarry Smith } 1238a729477SBarry Smith return 0; 1248a729477SBarry Smith } 1258a729477SBarry Smith 126*e0f21b7aSSatish Balay #undef __FUNCTION__ 127*e0f21b7aSSatish Balay #define __FUNCTION__ "MatGetValues_MPIAIJ" 128b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 129b49de8d1SLois Curfman McInnes { 130b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 132b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 133b49de8d1SLois Curfman McInnes 134b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 135b49de8d1SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 136b49de8d1SLois Curfman McInnes if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 137b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 138b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 139b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 140b49de8d1SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 141b49de8d1SLois Curfman McInnes if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 142b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 143b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 144b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 145b49de8d1SLois Curfman McInnes } 146b49de8d1SLois Curfman McInnes else { 147905e6a2fSBarry Smith if (!aij->colmap) { 148905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 149905e6a2fSBarry Smith } 150905e6a2fSBarry Smith col = aij->colmap[idxn[j]] - 1; 151e60e1c95SSatish Balay if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 152d9d09a02SSatish Balay else { 153b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 154b49de8d1SLois Curfman McInnes } 155b49de8d1SLois Curfman McInnes } 156b49de8d1SLois Curfman McInnes } 157d9d09a02SSatish Balay } 158b49de8d1SLois Curfman McInnes else { 159b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 160b49de8d1SLois Curfman McInnes } 161b49de8d1SLois Curfman McInnes } 162b49de8d1SLois Curfman McInnes return 0; 163b49de8d1SLois Curfman McInnes } 164b49de8d1SLois Curfman McInnes 165*e0f21b7aSSatish Balay #undef __FUNCTION__ 166*e0f21b7aSSatish Balay #define __FUNCTION__ "MatAssemblyBegin_MPIAIJ" 167fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 1688a729477SBarry Smith { 16944a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 170d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 17117699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 17217699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 1731eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 1746abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 1751eb62cbbSBarry Smith InsertMode addv; 1761eb62cbbSBarry Smith Scalar *rvalues,*svalues; 1771eb62cbbSBarry Smith 1781eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 179d65a2f8fSBarry Smith MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 180dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 181bbb6d6a8SBarry Smith SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 1821eb62cbbSBarry Smith } 1831eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 1841eb62cbbSBarry Smith 1851eb62cbbSBarry Smith /* first count number of contributors to each processor */ 1860452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 187cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1880452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 1891eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1901eb62cbbSBarry Smith idx = aij->stash.idx[i]; 19117699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 1921eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 1931eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1948a729477SBarry Smith } 1958a729477SBarry Smith } 1968a729477SBarry Smith } 19717699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1981eb62cbbSBarry Smith 1991eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 2000452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 20117699dbbSLois Curfman McInnes MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 20217699dbbSLois Curfman McInnes nreceives = work[rank]; 20317699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 20417699dbbSLois Curfman McInnes nmax = work[rank]; 2050452661fSBarry Smith PetscFree(work); 2061eb62cbbSBarry Smith 2071eb62cbbSBarry Smith /* post receives: 2081eb62cbbSBarry Smith 1) each message will consist of ordered pairs 2091eb62cbbSBarry Smith (global index,value) we store the global index as a double 210d6dfbf8fSBarry Smith to simplify the message passing. 2111eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 2121eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 2131eb62cbbSBarry Smith this is a lot of wasted space. 2141eb62cbbSBarry Smith 2151eb62cbbSBarry Smith 2161eb62cbbSBarry Smith This could be done better. 2171eb62cbbSBarry Smith */ 2180452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 21978b31e54SBarry Smith CHKPTRQ(rvalues); 2200452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 22178b31e54SBarry Smith CHKPTRQ(recv_waits); 2221eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 223416022c9SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 2241eb62cbbSBarry Smith comm,recv_waits+i); 2251eb62cbbSBarry Smith } 2261eb62cbbSBarry Smith 2271eb62cbbSBarry Smith /* do sends: 2281eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 2291eb62cbbSBarry Smith the ith processor 2301eb62cbbSBarry Smith */ 2310452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 2320452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 23378b31e54SBarry Smith CHKPTRQ(send_waits); 2340452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 2351eb62cbbSBarry Smith starts[0] = 0; 23617699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2371eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 2381eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 2391eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 2401eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 2411eb62cbbSBarry Smith } 2420452661fSBarry Smith PetscFree(owner); 2431eb62cbbSBarry Smith starts[0] = 0; 24417699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2451eb62cbbSBarry Smith count = 0; 24617699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 2471eb62cbbSBarry Smith if (procs[i]) { 248416022c9SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 2491eb62cbbSBarry Smith comm,send_waits+count++); 2501eb62cbbSBarry Smith } 2511eb62cbbSBarry Smith } 2520452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 2531eb62cbbSBarry Smith 2541eb62cbbSBarry Smith /* Free cache space */ 25555908cccSLois Curfman McInnes PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 25678b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 2571eb62cbbSBarry Smith 2581eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 2591eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 2601eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 2611eb62cbbSBarry Smith aij->rmax = nmax; 2621eb62cbbSBarry Smith 2631eb62cbbSBarry Smith return 0; 2641eb62cbbSBarry Smith } 26544a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 2661eb62cbbSBarry Smith 267*e0f21b7aSSatish Balay #undef __FUNCTION__ 268*e0f21b7aSSatish Balay #define __FUNCTION__ "MatAssemblyEnd_MPIAIJ" 269fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 2701eb62cbbSBarry Smith { 27144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 2721eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 273416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 274905e6a2fSBarry Smith int row,col,other_disassembled; 2751eb62cbbSBarry Smith Scalar *values,val; 2761eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 2771eb62cbbSBarry Smith 2781eb62cbbSBarry Smith /* wait on receives */ 2791eb62cbbSBarry Smith while (count) { 280d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 2811eb62cbbSBarry Smith /* unpack receives into our local space */ 282d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 283c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 2841eb62cbbSBarry Smith n = n/3; 2851eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 286227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - aij->rstart; 287227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 2881eb62cbbSBarry Smith val = values[3*i+2]; 2891eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 2901eb62cbbSBarry Smith col -= aij->cstart; 2911eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 2921eb62cbbSBarry Smith } 2931eb62cbbSBarry Smith else { 29455a1b374SBarry Smith if (mat->was_assembled || mat->assembled) { 295905e6a2fSBarry Smith if (!aij->colmap) { 296905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 297905e6a2fSBarry Smith } 298905e6a2fSBarry Smith col = aij->colmap[col] - 1; 299ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 3002493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 301227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 302d6dfbf8fSBarry Smith } 3039e25ed09SBarry Smith } 3041eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 3051eb62cbbSBarry Smith } 3061eb62cbbSBarry Smith } 3071eb62cbbSBarry Smith count--; 3081eb62cbbSBarry Smith } 3090452661fSBarry Smith PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 3101eb62cbbSBarry Smith 3111eb62cbbSBarry Smith /* wait on sends */ 3121eb62cbbSBarry Smith if (aij->nsends) { 3130a198c4cSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 3141eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 3150452661fSBarry Smith PetscFree(send_status); 3161eb62cbbSBarry Smith } 3170452661fSBarry Smith PetscFree(aij->send_waits); PetscFree(aij->svalues); 3181eb62cbbSBarry Smith 31964119d58SLois Curfman McInnes aij->insertmode = NOT_SET_VALUES; 32078b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 32178b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 3221eb62cbbSBarry Smith 3232493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 3242493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 325227d817aSBarry Smith MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 326227d817aSBarry Smith if (mat->was_assembled && !other_disassembled) { 3272493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 3282493cbb0SBarry Smith } 3292493cbb0SBarry Smith 3306d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 33178b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 3325e42470aSBarry Smith } 33378b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 33478b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 3355e42470aSBarry Smith 3367a0afa10SBarry Smith if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 3378a729477SBarry Smith return 0; 3388a729477SBarry Smith } 3398a729477SBarry Smith 340*e0f21b7aSSatish Balay #undef __FUNCTION__ 341*e0f21b7aSSatish Balay #define __FUNCTION__ "MatZeroEntries_MPIAIJ" 3422ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A) 3431eb62cbbSBarry Smith { 34444a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 345dbd7a890SLois Curfman McInnes int ierr; 34678b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 34778b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 3481eb62cbbSBarry Smith return 0; 3491eb62cbbSBarry Smith } 3501eb62cbbSBarry Smith 3511eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 3521eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 3531eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 3541eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 3551eb62cbbSBarry Smith routine. 3561eb62cbbSBarry Smith */ 357*e0f21b7aSSatish Balay #undef __FUNCTION__ 358*e0f21b7aSSatish Balay #define __FUNCTION__ "MatZeroRows_MPIAIJ" 35944a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 3601eb62cbbSBarry Smith { 36144a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 36217699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 3636abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 36417699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 3655392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 366d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 367d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 3681eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 3691eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 3701eb62cbbSBarry Smith IS istmp; 3711eb62cbbSBarry Smith 37277c4ece6SBarry Smith ierr = ISGetSize(is,&N); CHKERRQ(ierr); 37378b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 3741eb62cbbSBarry Smith 3751eb62cbbSBarry Smith /* first count number of contributors to each processor */ 3760452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 377cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 3780452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 3791eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3801eb62cbbSBarry Smith idx = rows[i]; 3811eb62cbbSBarry Smith found = 0; 38217699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 3831eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3841eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 3851eb62cbbSBarry Smith } 3861eb62cbbSBarry Smith } 387bbb6d6a8SBarry Smith if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 3881eb62cbbSBarry Smith } 38917699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 3901eb62cbbSBarry Smith 3911eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3920452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 39317699dbbSLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 39417699dbbSLois Curfman McInnes nrecvs = work[rank]; 39517699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 39617699dbbSLois Curfman McInnes nmax = work[rank]; 3970452661fSBarry Smith PetscFree(work); 3981eb62cbbSBarry Smith 3991eb62cbbSBarry Smith /* post receives: */ 4000452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 40178b31e54SBarry Smith CHKPTRQ(rvalues); 4020452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 40378b31e54SBarry Smith CHKPTRQ(recv_waits); 4041eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 405416022c9SBarry Smith MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 4061eb62cbbSBarry Smith } 4071eb62cbbSBarry Smith 4081eb62cbbSBarry Smith /* do sends: 4091eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 4101eb62cbbSBarry Smith the ith processor 4111eb62cbbSBarry Smith */ 4120452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 4130452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 41478b31e54SBarry Smith CHKPTRQ(send_waits); 4150452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 4161eb62cbbSBarry Smith starts[0] = 0; 41717699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 4181eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 4191eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 4201eb62cbbSBarry Smith } 4211eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 4221eb62cbbSBarry Smith 4231eb62cbbSBarry Smith starts[0] = 0; 42417699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 4251eb62cbbSBarry Smith count = 0; 42617699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 4271eb62cbbSBarry Smith if (procs[i]) { 428416022c9SBarry Smith MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 4291eb62cbbSBarry Smith } 4301eb62cbbSBarry Smith } 4310452661fSBarry Smith PetscFree(starts); 4321eb62cbbSBarry Smith 43317699dbbSLois Curfman McInnes base = owners[rank]; 4341eb62cbbSBarry Smith 4351eb62cbbSBarry Smith /* wait on receives */ 4360452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 4371eb62cbbSBarry Smith source = lens + nrecvs; 4381eb62cbbSBarry Smith count = nrecvs; slen = 0; 4391eb62cbbSBarry Smith while (count) { 440d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 4411eb62cbbSBarry Smith /* unpack receives into our local space */ 4421eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 443d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 444d6dfbf8fSBarry Smith lens[imdex] = n; 4451eb62cbbSBarry Smith slen += n; 4461eb62cbbSBarry Smith count--; 4471eb62cbbSBarry Smith } 4480452661fSBarry Smith PetscFree(recv_waits); 4491eb62cbbSBarry Smith 4501eb62cbbSBarry Smith /* move the data into the send scatter */ 4510452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 4521eb62cbbSBarry Smith count = 0; 4531eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 4541eb62cbbSBarry Smith values = rvalues + i*nmax; 4551eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 4561eb62cbbSBarry Smith lrows[count++] = values[j] - base; 4571eb62cbbSBarry Smith } 4581eb62cbbSBarry Smith } 4590452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 4600452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 4611eb62cbbSBarry Smith 4621eb62cbbSBarry Smith /* actually zap the local rows */ 463537820f0SBarry Smith ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 464464493b3SBarry Smith PLogObjectParent(A,istmp); 4650452661fSBarry Smith PetscFree(lrows); 46678b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 46778b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 46878b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 4691eb62cbbSBarry Smith 4701eb62cbbSBarry Smith /* wait on sends */ 4711eb62cbbSBarry Smith if (nsends) { 4720452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 47378b31e54SBarry Smith CHKPTRQ(send_status); 4741eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 4750452661fSBarry Smith PetscFree(send_status); 4761eb62cbbSBarry Smith } 4770452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 4781eb62cbbSBarry Smith 4791eb62cbbSBarry Smith return 0; 4801eb62cbbSBarry Smith } 4811eb62cbbSBarry Smith 482*e0f21b7aSSatish Balay #undef __FUNCTION__ 483*e0f21b7aSSatish Balay #define __FUNCTION__ "MatMult_MPIAIJ" 484416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 4851eb62cbbSBarry Smith { 486416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 487fbd6ef76SBarry Smith int ierr,nt; 488416022c9SBarry Smith 489a2ce50c7SBarry Smith ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 490fbd6ef76SBarry Smith if (nt != a->n) { 49147b4a8eaSLois Curfman McInnes SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx"); 492fbd6ef76SBarry Smith } 49343a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 49438597bd4SSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 49543a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 49638597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 4971eb62cbbSBarry Smith return 0; 4981eb62cbbSBarry Smith } 4991eb62cbbSBarry Smith 500*e0f21b7aSSatish Balay #undef __FUNCTION__ 501*e0f21b7aSSatish Balay #define __FUNCTION__ "MatMultAdd_MPIAIJ" 502416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 503da3a660dSBarry Smith { 504416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 505da3a660dSBarry Smith int ierr; 50643a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 50738597bd4SSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 50843a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 50938597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 510da3a660dSBarry Smith return 0; 511da3a660dSBarry Smith } 512da3a660dSBarry Smith 513*e0f21b7aSSatish Balay #undef __FUNCTION__ 514*e0f21b7aSSatish Balay #define __FUNCTION__ "MatMultTrans_MPIAIJ" 515416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 516da3a660dSBarry Smith { 517416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 518da3a660dSBarry Smith int ierr; 519da3a660dSBarry Smith 520da3a660dSBarry Smith /* do nondiagonal part */ 52138597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 522da3a660dSBarry Smith /* send it on its way */ 523537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 524da3a660dSBarry Smith /* do local part */ 52538597bd4SSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 526da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 527da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 528da3a660dSBarry Smith /* but is not perhaps always true. */ 529537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 530da3a660dSBarry Smith return 0; 531da3a660dSBarry Smith } 532da3a660dSBarry Smith 533*e0f21b7aSSatish Balay #undef __FUNCTION__ 534*e0f21b7aSSatish Balay #define __FUNCTION__ "MatMultTransAdd_MPIAIJ" 535416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 536da3a660dSBarry Smith { 537416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 538da3a660dSBarry Smith int ierr; 539da3a660dSBarry Smith 540da3a660dSBarry Smith /* do nondiagonal part */ 54138597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 542da3a660dSBarry Smith /* send it on its way */ 543537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 544da3a660dSBarry Smith /* do local part */ 54538597bd4SSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 546da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 547da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 548da3a660dSBarry Smith /* but is not perhaps always true. */ 5490a198c4cSBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 550da3a660dSBarry Smith return 0; 551da3a660dSBarry Smith } 552da3a660dSBarry Smith 5531eb62cbbSBarry Smith /* 5541eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 5551eb62cbbSBarry Smith diagonal block 5561eb62cbbSBarry Smith */ 557*e0f21b7aSSatish Balay #undef __FUNCTION__ 558*e0f21b7aSSatish Balay #define __FUNCTION__ "MatGetDiagonal_MPIAIJ" 559416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 5601eb62cbbSBarry Smith { 561416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 56244cd7ae7SLois Curfman McInnes if (a->M != a->N) 56344cd7ae7SLois Curfman McInnes SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block"); 5645baf8537SBarry Smith if (a->rstart != a->cstart || a->rend != a->cend) { 5655baf8537SBarry Smith SETERRQ(1,"MatGetDiagonal_MPIAIJ:row partition must equal col partition"); } 566416022c9SBarry Smith return MatGetDiagonal(a->A,v); 5671eb62cbbSBarry Smith } 5681eb62cbbSBarry Smith 569*e0f21b7aSSatish Balay #undef __FUNCTION__ 570*e0f21b7aSSatish Balay #define __FUNCTION__ "MatScale_MPIAIJ" 571052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A) 572052efed2SBarry Smith { 573052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 574052efed2SBarry Smith int ierr; 575052efed2SBarry Smith ierr = MatScale(aa,a->A); CHKERRQ(ierr); 576052efed2SBarry Smith ierr = MatScale(aa,a->B); CHKERRQ(ierr); 577052efed2SBarry Smith return 0; 578052efed2SBarry Smith } 579052efed2SBarry Smith 580*e0f21b7aSSatish Balay #undef __FUNCTION__ 581*e0f21b7aSSatish Balay #define __FUNCTION__ "MatDestroy_MPIAIJ" 58244a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj) 5831eb62cbbSBarry Smith { 5841eb62cbbSBarry Smith Mat mat = (Mat) obj; 58544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 5861eb62cbbSBarry Smith int ierr; 587a5a9c739SBarry Smith #if defined(PETSC_LOG) 5886e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 589a5a9c739SBarry Smith #endif 5900452661fSBarry Smith PetscFree(aij->rowners); 59178b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 59278b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 5930452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 5940452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 5951eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 596a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 5977a0afa10SBarry Smith if (aij->rowvalues) PetscFree(aij->rowvalues); 5980452661fSBarry Smith PetscFree(aij); 59990f02eecSBarry Smith if (mat->mapping) { 60090f02eecSBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 60190f02eecSBarry Smith } 602a5a9c739SBarry Smith PLogObjectDestroy(mat); 6030452661fSBarry Smith PetscHeaderDestroy(mat); 6041eb62cbbSBarry Smith return 0; 6051eb62cbbSBarry Smith } 6067857610eSBarry Smith #include "draw.h" 607b16a3bb1SBarry Smith #include "pinclude/pviewer.h" 608ee50ffe9SBarry Smith 609*e0f21b7aSSatish Balay #undef __FUNCTION__ 610*e0f21b7aSSatish Balay #define __FUNCTION__ "MatView_MPIAIJ_Binary" 611416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 6121eb62cbbSBarry Smith { 613416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 614416022c9SBarry Smith int ierr; 615416022c9SBarry Smith 61617699dbbSLois Curfman McInnes if (aij->size == 1) { 617416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 618416022c9SBarry Smith } 619a523beb4SLois Curfman McInnes else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 620416022c9SBarry Smith return 0; 621416022c9SBarry Smith } 622416022c9SBarry Smith 623*e0f21b7aSSatish Balay #undef __FUNCTION__ 624*e0f21b7aSSatish Balay #define __FUNCTION__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 625d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 626416022c9SBarry Smith { 62744a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 628dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 629a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 630d636dbe3SBarry Smith FILE *fd; 63119bcc07fSBarry Smith ViewerType vtype; 632416022c9SBarry Smith 63319bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 63419bcc07fSBarry Smith if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 63590ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 6360a198c4cSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 6374e220ebcSLois Curfman McInnes MatInfo info; 6384e220ebcSLois Curfman McInnes int flg; 639a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 64090ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 6414e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 64295e01e2fSLois Curfman McInnes ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 64377c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 64495e01e2fSLois Curfman McInnes if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 6454e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 64695e01e2fSLois Curfman McInnes else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 6474e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 6484e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 6494e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 6504e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 6514e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 652a56f8943SBarry Smith fflush(fd); 65377c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 654a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 655416022c9SBarry Smith return 0; 656416022c9SBarry Smith } 6570a198c4cSBarry Smith else if (format == VIEWER_FORMAT_ASCII_INFO) { 65808480c60SBarry Smith return 0; 65908480c60SBarry Smith } 660416022c9SBarry Smith } 661416022c9SBarry Smith 66219bcc07fSBarry Smith if (vtype == DRAW_VIEWER) { 66319bcc07fSBarry Smith Draw draw; 66419bcc07fSBarry Smith PetscTruth isnull; 66519bcc07fSBarry Smith ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 66619bcc07fSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 66719bcc07fSBarry Smith } 66819bcc07fSBarry Smith 66919bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER) { 67090ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 67177c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 672d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 67317699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 6741eb62cbbSBarry Smith aij->cend); 67578b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 67678b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 677d13ab20cSBarry Smith fflush(fd); 67877c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 679d13ab20cSBarry Smith } 680416022c9SBarry Smith else { 681a56f8943SBarry Smith int size = aij->size; 682a56f8943SBarry Smith rank = aij->rank; 68317699dbbSLois Curfman McInnes if (size == 1) { 68478b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 68595373324SBarry Smith } 68695373324SBarry Smith else { 68795373324SBarry Smith /* assemble the entire matrix onto first processor. */ 68895373324SBarry Smith Mat A; 689ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 6902eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 69195373324SBarry Smith Scalar *a; 6922ee70a88SLois Curfman McInnes 69317699dbbSLois Curfman McInnes if (!rank) { 694b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 695c451ab25SLois Curfman McInnes CHKERRQ(ierr); 69695373324SBarry Smith } 69795373324SBarry Smith else { 698b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 699c451ab25SLois Curfman McInnes CHKERRQ(ierr); 70095373324SBarry Smith } 701464493b3SBarry Smith PLogObjectParent(mat,A); 702416022c9SBarry Smith 70395373324SBarry Smith /* copy over the A part */ 704ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 7052ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 70695373324SBarry Smith row = aij->rstart; 707dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 70895373324SBarry Smith for ( i=0; i<m; i++ ) { 709416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 71095373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 71195373324SBarry Smith } 7122ee70a88SLois Curfman McInnes aj = Aloc->j; 713dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 71495373324SBarry Smith 71595373324SBarry Smith /* copy over the B part */ 716ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 7172ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 71895373324SBarry Smith row = aij->rstart; 7190452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 720dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 72195373324SBarry Smith for ( i=0; i<m; i++ ) { 722416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 72395373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 72495373324SBarry Smith } 7250452661fSBarry Smith PetscFree(ct); 7266d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 7276d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 72817699dbbSLois Curfman McInnes if (!rank) { 72978b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 73095373324SBarry Smith } 73178b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 73295373324SBarry Smith } 73395373324SBarry Smith } 7341eb62cbbSBarry Smith return 0; 7351eb62cbbSBarry Smith } 7361eb62cbbSBarry Smith 737*e0f21b7aSSatish Balay #undef __FUNCTION__ 738*e0f21b7aSSatish Balay #define __FUNCTION__ "MatView_MPIAIJ" 739416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 740416022c9SBarry Smith { 741416022c9SBarry Smith Mat mat = (Mat) obj; 742416022c9SBarry Smith int ierr; 74319bcc07fSBarry Smith ViewerType vtype; 744416022c9SBarry Smith 74519bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 74619bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 74719bcc07fSBarry Smith vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 748d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 749416022c9SBarry Smith } 75019bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 751416022c9SBarry Smith return MatView_MPIAIJ_Binary(mat,viewer); 752416022c9SBarry Smith } 753416022c9SBarry Smith return 0; 754416022c9SBarry Smith } 755416022c9SBarry Smith 7561eb62cbbSBarry Smith /* 7571eb62cbbSBarry Smith This has to provide several versions. 7581eb62cbbSBarry Smith 7591eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 7601eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 761d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 7621eb62cbbSBarry Smith */ 763*e0f21b7aSSatish Balay #undef __FUNCTION__ 764*e0f21b7aSSatish Balay #define __FUNCTION__ "MatRelax_MPIAIJ" 765fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 766dbb450caSBarry Smith double fshift,int its,Vec xx) 7678a729477SBarry Smith { 76844a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 769d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 770ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 771c16cb8f2SBarry Smith Scalar *b,*x,*xs,*ls,d,*v,sum; 7726abc6512SBarry Smith int ierr,*idx, *diag; 773416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 7748a729477SBarry Smith 775d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 776dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 777dbb450caSBarry Smith ls = ls + shift; 778ec8511deSBarry Smith if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 779d6dfbf8fSBarry Smith diag = A->diag; 780c16cb8f2SBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 781da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 78238597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 783da3a660dSBarry Smith } 78443a90d84SBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 78578b31e54SBarry Smith CHKERRQ(ierr); 78643a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 78778b31e54SBarry Smith CHKERRQ(ierr); 788d6dfbf8fSBarry Smith while (its--) { 789d6dfbf8fSBarry Smith /* go down through the rows */ 790d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 791d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 792dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 793dbb450caSBarry Smith v = A->a + A->i[i] + shift; 794d6dfbf8fSBarry Smith sum = b[i]; 795d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 796dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 797d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 798dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 799dbb450caSBarry Smith v = B->a + B->i[i] + shift; 800d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 80155a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 802d6dfbf8fSBarry Smith } 803d6dfbf8fSBarry Smith /* come up through the rows */ 804d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 805d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 806dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 807dbb450caSBarry Smith v = A->a + A->i[i] + shift; 808d6dfbf8fSBarry Smith sum = b[i]; 809d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 810dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 811d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 812dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 813dbb450caSBarry Smith v = B->a + B->i[i] + shift; 814d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 81555a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 816d6dfbf8fSBarry Smith } 817d6dfbf8fSBarry Smith } 818d6dfbf8fSBarry Smith } 819d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 820da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 82138597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 822da3a660dSBarry Smith } 82343a90d84SBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 82478b31e54SBarry Smith CHKERRQ(ierr); 82543a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 82678b31e54SBarry Smith CHKERRQ(ierr); 827d6dfbf8fSBarry Smith while (its--) { 828d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 829d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 830dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 831dbb450caSBarry Smith v = A->a + A->i[i] + shift; 832d6dfbf8fSBarry Smith sum = b[i]; 833d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 834dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 835d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 836dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 837dbb450caSBarry Smith v = B->a + B->i[i] + shift; 838d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 83955a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 840d6dfbf8fSBarry Smith } 841d6dfbf8fSBarry Smith } 842d6dfbf8fSBarry Smith } 843d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 844da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 84538597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 846da3a660dSBarry Smith } 84743a90d84SBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 84878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 84943a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 85078b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 851d6dfbf8fSBarry Smith while (its--) { 852d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 853d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 854dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 855dbb450caSBarry Smith v = A->a + A->i[i] + shift; 856d6dfbf8fSBarry Smith sum = b[i]; 857d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 858dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 859d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 860dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 861dbb450caSBarry Smith v = B->a + B->i[i] + shift; 862d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 86355a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 864d6dfbf8fSBarry Smith } 865d6dfbf8fSBarry Smith } 866d6dfbf8fSBarry Smith } 867c16cb8f2SBarry Smith else { 868c16cb8f2SBarry Smith SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 869c16cb8f2SBarry Smith } 8708a729477SBarry Smith return 0; 8718a729477SBarry Smith } 872a66be287SLois Curfman McInnes 873*e0f21b7aSSatish Balay #undef __FUNCTION__ 874*e0f21b7aSSatish Balay #define __FUNCTION__ "MatGetInfo_MPIAIJ" 8754e220ebcSLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 876a66be287SLois Curfman McInnes { 877a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 878a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 8794e220ebcSLois Curfman McInnes int ierr; 8804e220ebcSLois Curfman McInnes double isend[5], irecv[5]; 881a66be287SLois Curfman McInnes 8824e220ebcSLois Curfman McInnes info->rows_global = (double)mat->M; 8834e220ebcSLois Curfman McInnes info->columns_global = (double)mat->N; 8844e220ebcSLois Curfman McInnes info->rows_local = (double)mat->m; 8854e220ebcSLois Curfman McInnes info->columns_local = (double)mat->N; 8864e220ebcSLois Curfman McInnes info->block_size = 1.0; 8874e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 8884e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 8894e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 8904e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 8914e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 8924e220ebcSLois Curfman McInnes isend[3] += info->memory; isend[4] += info->mallocs; 893a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 8944e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 8954e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 8964e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 8974e220ebcSLois Curfman McInnes info->memory = isend[3]; 8984e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 899a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 9004e220ebcSLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 9014e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 9024e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 9034e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 9044e220ebcSLois Curfman McInnes info->memory = irecv[3]; 9054e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 906a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 9074e220ebcSLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 9084e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 9094e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 9104e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 9114e220ebcSLois Curfman McInnes info->memory = irecv[3]; 9124e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 913a66be287SLois Curfman McInnes } 9144e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 9154e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 9164e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 9174e220ebcSLois Curfman McInnes 918a66be287SLois Curfman McInnes return 0; 919a66be287SLois Curfman McInnes } 920a66be287SLois Curfman McInnes 921299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 922299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 923299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 924299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 925299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 926299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 927299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 928299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 929299609e3SLois Curfman McInnes 930*e0f21b7aSSatish Balay #undef __FUNCTION__ 931*e0f21b7aSSatish Balay #define __FUNCTION__ "MatSetOption_MPIAIJ" 932416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op) 933c74985f6SBarry Smith { 934c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 935c74985f6SBarry Smith 9366d4a8577SBarry Smith if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 9376d4a8577SBarry Smith op == MAT_YES_NEW_NONZERO_LOCATIONS || 9386da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 939b1fbbac0SLois Curfman McInnes op == MAT_COLUMNS_SORTED) { 940b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 941b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 942b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 943aeafbbfcSLois Curfman McInnes a->roworiented = 1; 944c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 945c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 946b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 9476da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 9486d4a8577SBarry Smith op == MAT_SYMMETRIC || 9496d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 9506d4a8577SBarry Smith op == MAT_YES_NEW_DIAGONALS) 95194a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 9526d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) { 9534b0e389bSBarry Smith a->roworiented = 0; 9544b0e389bSBarry Smith MatSetOption(a->A,op); 9554b0e389bSBarry Smith MatSetOption(a->B,op); 95690f02eecSBarry Smith } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) { 95790f02eecSBarry Smith a->donotstash = 1; 95890f02eecSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) 9596d4a8577SBarry Smith {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");} 960c0bbcb79SLois Curfman McInnes else 9614d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 962c74985f6SBarry Smith return 0; 963c74985f6SBarry Smith } 964c74985f6SBarry Smith 965*e0f21b7aSSatish Balay #undef __FUNCTION__ 966*e0f21b7aSSatish Balay #define __FUNCTION__ "MatGetSize_MPIAIJ" 967d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 968c74985f6SBarry Smith { 96944a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 970c74985f6SBarry Smith *m = mat->M; *n = mat->N; 971c74985f6SBarry Smith return 0; 972c74985f6SBarry Smith } 973c74985f6SBarry Smith 974*e0f21b7aSSatish Balay #undef __FUNCTION__ 975*e0f21b7aSSatish Balay #define __FUNCTION__ "MatGetLocalSize_MPIAIJ" 976d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 977c74985f6SBarry Smith { 97844a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 979b7c46309SBarry Smith *m = mat->m; *n = mat->N; 980c74985f6SBarry Smith return 0; 981c74985f6SBarry Smith } 982c74985f6SBarry Smith 983*e0f21b7aSSatish Balay #undef __FUNCTION__ 984*e0f21b7aSSatish Balay #define __FUNCTION__ "MatGetOwnershipRange_MPIAIJ" 985d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 986c74985f6SBarry Smith { 98744a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 988c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 989c74985f6SBarry Smith return 0; 990c74985f6SBarry Smith } 991c74985f6SBarry Smith 9926d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 9936d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 9946d84be18SBarry Smith 995*e0f21b7aSSatish Balay #undef __FUNCTION__ 996*e0f21b7aSSatish Balay #define __FUNCTION__ "MatGetRow_MPIAIJ" 9976d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 99839e00950SLois Curfman McInnes { 999154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 100070f0671dSBarry Smith Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1001154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1002154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 100370f0671dSBarry Smith int *cmap, *idx_p; 100439e00950SLois Curfman McInnes 10057a0afa10SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 10067a0afa10SBarry Smith mat->getrowactive = PETSC_TRUE; 10077a0afa10SBarry Smith 100870f0671dSBarry Smith if (!mat->rowvalues && (idx || v)) { 10097a0afa10SBarry Smith /* 10107a0afa10SBarry Smith allocate enough space to hold information from the longest row. 10117a0afa10SBarry Smith */ 10127a0afa10SBarry Smith Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1013c16cb8f2SBarry Smith int max = 1,m = mat->m,tmp; 1014c16cb8f2SBarry Smith for ( i=0; i<m; i++ ) { 10157a0afa10SBarry Smith tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 10167a0afa10SBarry Smith if (max < tmp) { max = tmp; } 10177a0afa10SBarry Smith } 10187a0afa10SBarry Smith mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 10197a0afa10SBarry Smith CHKPTRQ(mat->rowvalues); 10207a0afa10SBarry Smith mat->rowindices = (int *) (mat->rowvalues + max); 10217a0afa10SBarry Smith } 10227a0afa10SBarry Smith 1023b49de8d1SLois Curfman McInnes if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1024abc0e9e4SLois Curfman McInnes lrow = row - rstart; 102539e00950SLois Curfman McInnes 1026154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1027154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1028154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 102938597bd4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 103038597bd4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1031154123eaSLois Curfman McInnes nztot = nzA + nzB; 1032154123eaSLois Curfman McInnes 103370f0671dSBarry Smith cmap = mat->garray; 1034154123eaSLois Curfman McInnes if (v || idx) { 1035154123eaSLois Curfman McInnes if (nztot) { 1036154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 103770f0671dSBarry Smith int imark = -1; 1038154123eaSLois Curfman McInnes if (v) { 103970f0671dSBarry Smith *v = v_p = mat->rowvalues; 104039e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 104170f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1042154123eaSLois Curfman McInnes else break; 1043154123eaSLois Curfman McInnes } 1044154123eaSLois Curfman McInnes imark = i; 104570f0671dSBarry Smith for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 104670f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1047154123eaSLois Curfman McInnes } 1048154123eaSLois Curfman McInnes if (idx) { 104970f0671dSBarry Smith *idx = idx_p = mat->rowindices; 105070f0671dSBarry Smith if (imark > -1) { 105170f0671dSBarry Smith for ( i=0; i<imark; i++ ) { 105270f0671dSBarry Smith idx_p[i] = cmap[cworkB[i]]; 105370f0671dSBarry Smith } 105470f0671dSBarry Smith } else { 1055154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 105670f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1057154123eaSLois Curfman McInnes else break; 1058154123eaSLois Curfman McInnes } 1059154123eaSLois Curfman McInnes imark = i; 106070f0671dSBarry Smith } 106170f0671dSBarry Smith for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 106270f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 106339e00950SLois Curfman McInnes } 106439e00950SLois Curfman McInnes } 10651ca473b0SSatish Balay else { 10661ca473b0SSatish Balay if (idx) *idx = 0; 10671ca473b0SSatish Balay if (v) *v = 0; 10681ca473b0SSatish Balay } 1069154123eaSLois Curfman McInnes } 107039e00950SLois Curfman McInnes *nz = nztot; 107138597bd4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 107238597bd4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 107339e00950SLois Curfman McInnes return 0; 107439e00950SLois Curfman McInnes } 107539e00950SLois Curfman McInnes 1076*e0f21b7aSSatish Balay #undef __FUNCTION__ 1077*e0f21b7aSSatish Balay #define __FUNCTION__ "MatRestoreRow_MPIAIJ" 10786d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 107939e00950SLois Curfman McInnes { 10807a0afa10SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 10817a0afa10SBarry Smith if (aij->getrowactive == PETSC_FALSE) { 10827a0afa10SBarry Smith SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 10837a0afa10SBarry Smith } 10847a0afa10SBarry Smith aij->getrowactive = PETSC_FALSE; 108539e00950SLois Curfman McInnes return 0; 108639e00950SLois Curfman McInnes } 108739e00950SLois Curfman McInnes 1088*e0f21b7aSSatish Balay #undef __FUNCTION__ 1089*e0f21b7aSSatish Balay #define __FUNCTION__ "MatNorm_MPIAIJ" 1090cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1091855ac2c5SLois Curfman McInnes { 1092855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1093ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1094416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1095416022c9SBarry Smith double sum = 0.0; 109604ca555eSLois Curfman McInnes Scalar *v; 109704ca555eSLois Curfman McInnes 109817699dbbSLois Curfman McInnes if (aij->size == 1) { 109914183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 110037fa93a5SLois Curfman McInnes } else { 110104ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 110204ca555eSLois Curfman McInnes v = amat->a; 110304ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 110404ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 110504ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 110604ca555eSLois Curfman McInnes #else 110704ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 110804ca555eSLois Curfman McInnes #endif 110904ca555eSLois Curfman McInnes } 111004ca555eSLois Curfman McInnes v = bmat->a; 111104ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 111204ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 111304ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 111404ca555eSLois Curfman McInnes #else 111504ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 111604ca555eSLois Curfman McInnes #endif 111704ca555eSLois Curfman McInnes } 11186d84be18SBarry Smith MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 111904ca555eSLois Curfman McInnes *norm = sqrt(*norm); 112004ca555eSLois Curfman McInnes } 112104ca555eSLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 112204ca555eSLois Curfman McInnes double *tmp, *tmp2; 112304ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 11240452661fSBarry Smith tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 11250452661fSBarry Smith tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1126cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 112704ca555eSLois Curfman McInnes *norm = 0.0; 112804ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 112904ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1130579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 113104ca555eSLois Curfman McInnes } 113204ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 113304ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1134579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 113504ca555eSLois Curfman McInnes } 11366d84be18SBarry Smith MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 113704ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 113804ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 113904ca555eSLois Curfman McInnes } 11400452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 114104ca555eSLois Curfman McInnes } 114204ca555eSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 1143515d9167SLois Curfman McInnes double ntemp = 0.0; 114404ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1145dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 114604ca555eSLois Curfman McInnes sum = 0.0; 114704ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1148cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 114904ca555eSLois Curfman McInnes } 1150dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 115104ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1152cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 115304ca555eSLois Curfman McInnes } 1154515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 115504ca555eSLois Curfman McInnes } 11566d84be18SBarry Smith MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 115704ca555eSLois Curfman McInnes } 115804ca555eSLois Curfman McInnes else { 1159515d9167SLois Curfman McInnes SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 116004ca555eSLois Curfman McInnes } 116137fa93a5SLois Curfman McInnes } 1162855ac2c5SLois Curfman McInnes return 0; 1163855ac2c5SLois Curfman McInnes } 1164855ac2c5SLois Curfman McInnes 1165*e0f21b7aSSatish Balay #undef __FUNCTION__ 1166*e0f21b7aSSatish Balay #define __FUNCTION__ "MatTranspose_MPIAIJ" 11670de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1168b7c46309SBarry Smith { 1169b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1170dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1171416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1172416022c9SBarry Smith Mat B; 1173b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1174b7c46309SBarry Smith Scalar *array; 1175b7c46309SBarry Smith 11763638b69dSLois Curfman McInnes if (matout == PETSC_NULL && M != N) 11773638b69dSLois Curfman McInnes SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1178b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1179b4fd4287SBarry Smith PETSC_NULL,&B); CHKERRQ(ierr); 1180b7c46309SBarry Smith 1181b7c46309SBarry Smith /* copy over the A part */ 1182ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1183b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1184b7c46309SBarry Smith row = a->rstart; 1185dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1186b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1187416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1188b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1189b7c46309SBarry Smith } 1190b7c46309SBarry Smith aj = Aloc->j; 11914af08d9eSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1192b7c46309SBarry Smith 1193b7c46309SBarry Smith /* copy over the B part */ 1194ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1195b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1196b7c46309SBarry Smith row = a->rstart; 11970452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1198dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1199b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1200416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1201b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1202b7c46309SBarry Smith } 12030452661fSBarry Smith PetscFree(ct); 12046d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12056d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12063638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 12070de55854SLois Curfman McInnes *matout = B; 12080de55854SLois Curfman McInnes } else { 12090de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 12100452661fSBarry Smith PetscFree(a->rowners); 12110de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 12120de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 12130452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 12140452661fSBarry Smith if (a->garray) PetscFree(a->garray); 12150de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1216a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 12170452661fSBarry Smith PetscFree(a); 1218416022c9SBarry Smith PetscMemcpy(A,B,sizeof(struct _Mat)); 12190452661fSBarry Smith PetscHeaderDestroy(B); 12200de55854SLois Curfman McInnes } 1221b7c46309SBarry Smith return 0; 1222b7c46309SBarry Smith } 1223b7c46309SBarry Smith 1224*e0f21b7aSSatish Balay #undef __FUNCTION__ 1225*e0f21b7aSSatish Balay #define __FUNCTION__ "MatDiagonalScale_MPIAIJ" 12264b967eb1SSatish Balay int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1227a008b906SSatish Balay { 12284b967eb1SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 12294b967eb1SSatish Balay Mat a = aij->A, b = aij->B; 1230a008b906SSatish Balay int ierr,s1,s2,s3; 1231a008b906SSatish Balay 12324b967eb1SSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 12334b967eb1SSatish Balay if (rr) { 12344b967eb1SSatish Balay s3 = aij->n; 12354b967eb1SSatish Balay VecGetLocalSize_Fast(rr,s1); 12364b967eb1SSatish Balay if (s1!=s3) SETERRQ(1,"MatDiagonalScale: right vector non-conforming local size"); 12374b967eb1SSatish Balay /* Overlap communication with computation. */ 123843a90d84SBarry Smith ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1239a008b906SSatish Balay } 12404b967eb1SSatish Balay if (ll) { 12414b967eb1SSatish Balay VecGetLocalSize_Fast(ll,s1); 12424b967eb1SSatish Balay if (s1!=s2) SETERRQ(1,"MatDiagonalScale: left vector non-conforming local size"); 1243c351683dSSatish Balay ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 12444b967eb1SSatish Balay } 12454b967eb1SSatish Balay /* scale the diagonal block */ 1246c351683dSSatish Balay ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 12474b967eb1SSatish Balay 12484b967eb1SSatish Balay if (rr) { 12494b967eb1SSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 125043a90d84SBarry Smith ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1251c351683dSSatish Balay ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 12524b967eb1SSatish Balay } 12534b967eb1SSatish Balay 1254a008b906SSatish Balay return 0; 1255a008b906SSatish Balay } 1256a008b906SSatish Balay 1257a008b906SSatish Balay 1258682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat); 1259*e0f21b7aSSatish Balay #undef __FUNCTION__ 1260*e0f21b7aSSatish Balay #define __FUNCTION__ "MatPrintHelp_MPIAIJ" 1261682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A) 1262682d7d0cSBarry Smith { 1263682d7d0cSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1264682d7d0cSBarry Smith 1265682d7d0cSBarry Smith if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1266682d7d0cSBarry Smith else return 0; 1267682d7d0cSBarry Smith } 1268682d7d0cSBarry Smith 1269*e0f21b7aSSatish Balay #undef __FUNCTION__ 1270*e0f21b7aSSatish Balay #define __FUNCTION__ "MatGetBlockSize_MPIAIJ" 12715a838052SSatish Balay static int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 12725a838052SSatish Balay { 12735a838052SSatish Balay *bs = 1; 12745a838052SSatish Balay return 0; 12755a838052SSatish Balay } 12765a838052SSatish Balay 1277fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 12783d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 12792f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 12800a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 12810a198c4cSBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 12828a729477SBarry Smith /* -------------------------------------------------------------------*/ 12832ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 128439e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 128544a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 128644a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 128736ce4990SBarry Smith 0,0, 128836ce4990SBarry Smith 0,0, 128936ce4990SBarry Smith 0,0, 129044a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1291b7c46309SBarry Smith MatTranspose_MPIAIJ, 1292a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1293a008b906SSatish Balay MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1294ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 12951eb62cbbSBarry Smith 0, 1296299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 129736ce4990SBarry Smith 0,0,0,0, 1298d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 129936ce4990SBarry Smith 0,0, 13003e85bedfSBarry Smith 0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0, 1301b49de8d1SLois Curfman McInnes 0,0,0, 1302598137ffSSatish Balay MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1303052efed2SBarry Smith MatPrintHelp_MPIAIJ, 13043b2fbd54SBarry Smith MatScale_MPIAIJ,0,0,0, 13050a198c4cSBarry Smith MatGetBlockSize_MPIAIJ,0,0,0,0, 13060a198c4cSBarry Smith MatFDColoringCreate_MPIAIJ}; 130736ce4990SBarry Smith 13088a729477SBarry Smith 1309*e0f21b7aSSatish Balay #undef __FUNCTION__ 1310*e0f21b7aSSatish Balay #define __FUNCTION__ "MatCreateMPIAIJ" 13111987afe7SBarry Smith /*@C 1312ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 13133a511b96SLois Curfman McInnes (the default parallel PETSc format). For good matrix assembly performance 13143a511b96SLois Curfman McInnes the user should preallocate the matrix storage by setting the parameters 13153a511b96SLois Curfman McInnes d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 13163a511b96SLois Curfman McInnes performance can be increased by more than a factor of 50. 13178a729477SBarry Smith 13188a729477SBarry Smith Input Parameters: 13191eb62cbbSBarry Smith . comm - MPI communicator 13207d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 132192e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 132292e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 132392e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 132492e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 132592e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 13267d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 132792e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1328ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1329ff756334SLois Curfman McInnes (same for all local rows) 13302bd5e0b2SLois Curfman McInnes . d_nzz - array containing the number of nonzeros in the various rows of the 133192e8d321SLois Curfman McInnes diagonal portion of the local submatrix (possibly different for each row) 13322bd5e0b2SLois Curfman McInnes or PETSC_NULL. You must leave room for the diagonal entry even if 13332bd5e0b2SLois Curfman McInnes it is zero. 13342bd5e0b2SLois Curfman McInnes . o_nz - number of nonzeros per row in the off-diagonal portion of local 1335ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 13362bd5e0b2SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various rows of the 13372bd5e0b2SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 13382bd5e0b2SLois Curfman McInnes each row) or PETSC_NULL. 13398a729477SBarry Smith 1340ff756334SLois Curfman McInnes Output Parameter: 134144cd7ae7SLois Curfman McInnes . A - the matrix 13428a729477SBarry Smith 1343ff756334SLois Curfman McInnes Notes: 1344ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1345ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 13460002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 13470002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1348ff756334SLois Curfman McInnes 1349ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1350ff756334SLois Curfman McInnes (possibly both). 1351ff756334SLois Curfman McInnes 13525511cfe3SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 13535511cfe3SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 13545511cfe3SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 13555511cfe3SLois Curfman McInnes 13565511cfe3SLois Curfman McInnes Options Database Keys: 13575511cfe3SLois Curfman McInnes $ -mat_aij_no_inode - Do not use inodes 13586e62573dSLois Curfman McInnes $ -mat_aij_inode_limit <limit> - Set inode limit. 13596e62573dSLois Curfman McInnes $ (max limit=5) 13606e62573dSLois Curfman McInnes $ -mat_aij_oneindex - Internally use indexing starting at 1 13616e62573dSLois Curfman McInnes $ rather than 0. Note: When calling MatSetValues(), 13626e62573dSLois Curfman McInnes $ the user still MUST index entries starting at 0! 13635511cfe3SLois Curfman McInnes 1364e0245417SLois Curfman McInnes Storage Information: 1365e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1366e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1367e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1368e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1369e0245417SLois Curfman McInnes 1370e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 13715ace5be8SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set 13725ace5be8SLois Curfman McInnes d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 13735ace5be8SLois Curfman McInnes memory allocation. Likewise, specify preallocated storage for the 13745ace5be8SLois Curfman McInnes off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1375ff756334SLois Curfman McInnes 13765511cfe3SLois Curfman McInnes Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 13775511cfe3SLois Curfman McInnes the figure below we depict these three local rows and all columns (0-11). 13782191d07cSBarry Smith 1379b810aeb4SBarry Smith $ 0 1 2 3 4 5 6 7 8 9 10 11 1380b810aeb4SBarry Smith $ ------------------- 13815511cfe3SLois Curfman McInnes $ row 3 | o o o d d d o o o o o o 13825511cfe3SLois Curfman McInnes $ row 4 | o o o d d d o o o o o o 13835511cfe3SLois Curfman McInnes $ row 5 | o o o d d d o o o o o o 13845511cfe3SLois Curfman McInnes $ ------------------- 1385b810aeb4SBarry Smith $ 1386b810aeb4SBarry Smith 13875511cfe3SLois Curfman McInnes Thus, any entries in the d locations are stored in the d (diagonal) 13885511cfe3SLois Curfman McInnes submatrix, and any entries in the o locations are stored in the 13895511cfe3SLois Curfman McInnes o (off-diagonal) submatrix. Note that the d and the o submatrices are 13905511cfe3SLois Curfman McInnes stored simply in the MATSEQAIJ format for compressed row storage. 13915511cfe3SLois Curfman McInnes 13925511cfe3SLois Curfman McInnes Now d_nz should indicate the number of nonzeros per row in the d matrix, 13935511cfe3SLois Curfman McInnes and o_nz should indicate the number of nonzeros per row in the o matrix. 13945511cfe3SLois Curfman McInnes In general, for PDE problems in which most nonzeros are near the diagonal, 13953d323bbdSBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 139692e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 13976da5968aSLois Curfman McInnes matrices. 13983a511b96SLois Curfman McInnes 1399dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1400ff756334SLois Curfman McInnes 1401fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 14028a729477SBarry Smith @*/ 14031eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 140444cd7ae7SLois Curfman McInnes int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 14058a729477SBarry Smith { 140644cd7ae7SLois Curfman McInnes Mat B; 140744cd7ae7SLois Curfman McInnes Mat_MPIAIJ *b; 140836ce4990SBarry Smith int ierr, i,sum[2],work[2],size; 1409416022c9SBarry Smith 141044cd7ae7SLois Curfman McInnes *A = 0; 141144cd7ae7SLois Curfman McInnes PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 141244cd7ae7SLois Curfman McInnes PLogObjectCreate(B); 141344cd7ae7SLois Curfman McInnes B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 141444cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_MPIAIJ)); 141544cd7ae7SLois Curfman McInnes PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 141636ce4990SBarry Smith MPI_Comm_size(comm,&size); 141736ce4990SBarry Smith if (size == 1) { 141836ce4990SBarry Smith B->ops.getrowij = MatGetRowIJ_MPIAIJ; 141936ce4990SBarry Smith B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 142036ce4990SBarry Smith B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 142136ce4990SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 142236ce4990SBarry Smith B->ops.lufactor = MatLUFactor_MPIAIJ; 142336ce4990SBarry Smith B->ops.solve = MatSolve_MPIAIJ; 142436ce4990SBarry Smith B->ops.solveadd = MatSolveAdd_MPIAIJ; 142536ce4990SBarry Smith B->ops.solvetrans = MatSolveTrans_MPIAIJ; 142636ce4990SBarry Smith B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 142736ce4990SBarry Smith B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 142836ce4990SBarry Smith } 142944cd7ae7SLois Curfman McInnes B->destroy = MatDestroy_MPIAIJ; 143044cd7ae7SLois Curfman McInnes B->view = MatView_MPIAIJ; 143144cd7ae7SLois Curfman McInnes B->factor = 0; 143244cd7ae7SLois Curfman McInnes B->assembled = PETSC_FALSE; 143390f02eecSBarry Smith B->mapping = 0; 1434d6dfbf8fSBarry Smith 143544cd7ae7SLois Curfman McInnes b->insertmode = NOT_SET_VALUES; 143644cd7ae7SLois Curfman McInnes MPI_Comm_rank(comm,&b->rank); 143744cd7ae7SLois Curfman McInnes MPI_Comm_size(comm,&b->size); 14381eb62cbbSBarry Smith 1439b4fd4287SBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 14402e0155e0SLois Curfman McInnes SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 14411987afe7SBarry Smith 1442dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 14431eb62cbbSBarry Smith work[0] = m; work[1] = n; 1444d65a2f8fSBarry Smith MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1445dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1446dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 14471eb62cbbSBarry Smith } 144844cd7ae7SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 144944cd7ae7SLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 145044cd7ae7SLois Curfman McInnes b->m = m; B->m = m; 145144cd7ae7SLois Curfman McInnes b->n = n; B->n = n; 145244cd7ae7SLois Curfman McInnes b->N = N; B->N = N; 145344cd7ae7SLois Curfman McInnes b->M = M; B->M = M; 14541eb62cbbSBarry Smith 14551eb62cbbSBarry Smith /* build local table of row and column ownerships */ 145644cd7ae7SLois Curfman McInnes b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 145744cd7ae7SLois Curfman McInnes PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1458603f58a4SSatish Balay b->cowners = b->rowners + b->size + 2; 145944cd7ae7SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 146044cd7ae7SLois Curfman McInnes b->rowners[0] = 0; 146144cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 146244cd7ae7SLois Curfman McInnes b->rowners[i] += b->rowners[i-1]; 14638a729477SBarry Smith } 146444cd7ae7SLois Curfman McInnes b->rstart = b->rowners[b->rank]; 146544cd7ae7SLois Curfman McInnes b->rend = b->rowners[b->rank+1]; 146644cd7ae7SLois Curfman McInnes MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 146744cd7ae7SLois Curfman McInnes b->cowners[0] = 0; 146844cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 146944cd7ae7SLois Curfman McInnes b->cowners[i] += b->cowners[i-1]; 14708a729477SBarry Smith } 147144cd7ae7SLois Curfman McInnes b->cstart = b->cowners[b->rank]; 147244cd7ae7SLois Curfman McInnes b->cend = b->cowners[b->rank+1]; 14738a729477SBarry Smith 14745ace5be8SLois Curfman McInnes if (d_nz == PETSC_DEFAULT) d_nz = 5; 147544cd7ae7SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 147644cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->A); 14777b8455f0SLois Curfman McInnes if (o_nz == PETSC_DEFAULT) o_nz = 0; 147844cd7ae7SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 147944cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->B); 14808a729477SBarry Smith 14811eb62cbbSBarry Smith /* build cache for off array entries formed */ 148244cd7ae7SLois Curfman McInnes ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 148390f02eecSBarry Smith b->donotstash = 0; 148444cd7ae7SLois Curfman McInnes b->colmap = 0; 148544cd7ae7SLois Curfman McInnes b->garray = 0; 148644cd7ae7SLois Curfman McInnes b->roworiented = 1; 14878a729477SBarry Smith 14881eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 148944cd7ae7SLois Curfman McInnes b->lvec = 0; 149044cd7ae7SLois Curfman McInnes b->Mvctx = 0; 14918a729477SBarry Smith 14927a0afa10SBarry Smith /* stuff for MatGetRow() */ 149344cd7ae7SLois Curfman McInnes b->rowindices = 0; 149444cd7ae7SLois Curfman McInnes b->rowvalues = 0; 149544cd7ae7SLois Curfman McInnes b->getrowactive = PETSC_FALSE; 14967a0afa10SBarry Smith 149744cd7ae7SLois Curfman McInnes *A = B; 1498d6dfbf8fSBarry Smith return 0; 1499d6dfbf8fSBarry Smith } 1500c74985f6SBarry Smith 1501*e0f21b7aSSatish Balay #undef __FUNCTION__ 1502*e0f21b7aSSatish Balay #define __FUNCTION__ "MatConvertSameType_MPIAIJ" 15033d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1504d6dfbf8fSBarry Smith { 1505d6dfbf8fSBarry Smith Mat mat; 1506416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1507a1b97e82SLois Curfman McInnes int ierr, len=0, flg; 1508d6dfbf8fSBarry Smith 1509416022c9SBarry Smith *newmat = 0; 15100452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1511a5a9c739SBarry Smith PLogObjectCreate(mat); 15120452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1513416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 151444a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 151544a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1516d6dfbf8fSBarry Smith mat->factor = matin->factor; 1517c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1518d6dfbf8fSBarry Smith 151944cd7ae7SLois Curfman McInnes a->m = mat->m = oldmat->m; 152044cd7ae7SLois Curfman McInnes a->n = mat->n = oldmat->n; 152144cd7ae7SLois Curfman McInnes a->M = mat->M = oldmat->M; 152244cd7ae7SLois Curfman McInnes a->N = mat->N = oldmat->N; 1523d6dfbf8fSBarry Smith 1524416022c9SBarry Smith a->rstart = oldmat->rstart; 1525416022c9SBarry Smith a->rend = oldmat->rend; 1526416022c9SBarry Smith a->cstart = oldmat->cstart; 1527416022c9SBarry Smith a->cend = oldmat->cend; 152817699dbbSLois Curfman McInnes a->size = oldmat->size; 152917699dbbSLois Curfman McInnes a->rank = oldmat->rank; 153064119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 1531bcd2baecSBarry Smith a->rowvalues = 0; 1532bcd2baecSBarry Smith a->getrowactive = PETSC_FALSE; 1533d6dfbf8fSBarry Smith 1534603f58a4SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1535603f58a4SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1536603f58a4SSatish Balay a->cowners = a->rowners + a->size + 2; 1537603f58a4SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1538416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 15392ee70a88SLois Curfman McInnes if (oldmat->colmap) { 15400452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1541416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1542416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1543416022c9SBarry Smith } else a->colmap = 0; 15443f41c07dSBarry Smith if (oldmat->garray) { 15453f41c07dSBarry Smith len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 15463f41c07dSBarry Smith a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1547464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 15483f41c07dSBarry Smith if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1549416022c9SBarry Smith } else a->garray = 0; 1550d6dfbf8fSBarry Smith 1551416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1552416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1553a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1554416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1555416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1556416022c9SBarry Smith PLogObjectParent(mat,a->A); 1557416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1558416022c9SBarry Smith PLogObjectParent(mat,a->B); 15595dd7a6c7SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 156025cdf11fSBarry Smith if (flg) { 1561682d7d0cSBarry Smith ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1562682d7d0cSBarry Smith } 15638a729477SBarry Smith *newmat = mat; 15648a729477SBarry Smith return 0; 15658a729477SBarry Smith } 1566416022c9SBarry Smith 156777c4ece6SBarry Smith #include "sys.h" 1568416022c9SBarry Smith 1569*e0f21b7aSSatish Balay #undef __FUNCTION__ 1570*e0f21b7aSSatish Balay #define __FUNCTION__ "MatLoad_MPIAIJ" 157119bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1572416022c9SBarry Smith { 1573d65a2f8fSBarry Smith Mat A; 1574416022c9SBarry Smith int i, nz, ierr, j,rstart, rend, fd; 1575d65a2f8fSBarry Smith Scalar *vals,*svals; 157619bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 1577416022c9SBarry Smith MPI_Status status; 157817699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1579d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 158019bcc07fSBarry Smith int tag = ((PetscObject)viewer)->tag; 1581416022c9SBarry Smith 158217699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 158317699dbbSLois Curfman McInnes if (!rank) { 158490ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 158577c4ece6SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1586c456f294SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1587416022c9SBarry Smith } 1588416022c9SBarry Smith 1589416022c9SBarry Smith MPI_Bcast(header+1,3,MPI_INT,0,comm); 1590416022c9SBarry Smith M = header[1]; N = header[2]; 1591416022c9SBarry Smith /* determine ownership of all rows */ 159217699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 15930452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1594416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1595416022c9SBarry Smith rowners[0] = 0; 159617699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1597416022c9SBarry Smith rowners[i] += rowners[i-1]; 1598416022c9SBarry Smith } 159917699dbbSLois Curfman McInnes rstart = rowners[rank]; 160017699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1601416022c9SBarry Smith 1602416022c9SBarry Smith /* distribute row lengths to all processors */ 16030452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1604416022c9SBarry Smith offlens = ourlens + (rend-rstart); 160517699dbbSLois Curfman McInnes if (!rank) { 16060452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 160777c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 16080452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 160917699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1610d65a2f8fSBarry Smith MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 16110452661fSBarry Smith PetscFree(sndcounts); 1612416022c9SBarry Smith } 1613416022c9SBarry Smith else { 1614416022c9SBarry Smith MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1615416022c9SBarry Smith } 1616416022c9SBarry Smith 161717699dbbSLois Curfman McInnes if (!rank) { 1618416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 16190452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1620cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 162117699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1622416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1623416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1624416022c9SBarry Smith } 1625416022c9SBarry Smith } 16260452661fSBarry Smith PetscFree(rowlengths); 1627416022c9SBarry Smith 1628416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1629416022c9SBarry Smith maxnz = 0; 163017699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 16310452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 1632416022c9SBarry Smith } 16330452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1634416022c9SBarry Smith 1635416022c9SBarry Smith /* read in my part of the matrix column indices */ 1636416022c9SBarry Smith nz = procsnz[0]; 16370452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 163877c4ece6SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1639d65a2f8fSBarry Smith 1640d65a2f8fSBarry Smith /* read in every one elses and ship off */ 164117699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1642d65a2f8fSBarry Smith nz = procsnz[i]; 164377c4ece6SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1644d65a2f8fSBarry Smith MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1645d65a2f8fSBarry Smith } 16460452661fSBarry Smith PetscFree(cols); 1647416022c9SBarry Smith } 1648416022c9SBarry Smith else { 1649416022c9SBarry Smith /* determine buffer space needed for message */ 1650416022c9SBarry Smith nz = 0; 1651416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1652416022c9SBarry Smith nz += ourlens[i]; 1653416022c9SBarry Smith } 16540452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1655416022c9SBarry Smith 1656416022c9SBarry Smith /* receive message of column indices*/ 1657d65a2f8fSBarry Smith MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1658416022c9SBarry Smith MPI_Get_count(&status,MPI_INT,&maxnz); 1659c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1660416022c9SBarry Smith } 1661416022c9SBarry Smith 1662416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1663cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 1664416022c9SBarry Smith jj = 0; 1665416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1666416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1667d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1668416022c9SBarry Smith jj++; 1669416022c9SBarry Smith } 1670416022c9SBarry Smith } 1671d65a2f8fSBarry Smith 1672d65a2f8fSBarry Smith /* create our matrix */ 1673416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1674416022c9SBarry Smith ourlens[i] -= offlens[i]; 1675416022c9SBarry Smith } 1676d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1677d65a2f8fSBarry Smith A = *newmat; 16786d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 1679d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1680d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1681d65a2f8fSBarry Smith } 1682416022c9SBarry Smith 168317699dbbSLois Curfman McInnes if (!rank) { 16840452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1685416022c9SBarry Smith 1686416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1687416022c9SBarry Smith nz = procsnz[0]; 168877c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1689d65a2f8fSBarry Smith 1690d65a2f8fSBarry Smith /* insert into matrix */ 1691d65a2f8fSBarry Smith jj = rstart; 1692d65a2f8fSBarry Smith smycols = mycols; 1693d65a2f8fSBarry Smith svals = vals; 1694d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1695d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1696d65a2f8fSBarry Smith smycols += ourlens[i]; 1697d65a2f8fSBarry Smith svals += ourlens[i]; 1698d65a2f8fSBarry Smith jj++; 1699416022c9SBarry Smith } 1700416022c9SBarry Smith 1701d65a2f8fSBarry Smith /* read in other processors and ship out */ 170217699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1703416022c9SBarry Smith nz = procsnz[i]; 170477c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1705d65a2f8fSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1706416022c9SBarry Smith } 17070452661fSBarry Smith PetscFree(procsnz); 1708416022c9SBarry Smith } 1709d65a2f8fSBarry Smith else { 1710d65a2f8fSBarry Smith /* receive numeric values */ 17110452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1712416022c9SBarry Smith 1713d65a2f8fSBarry Smith /* receive message of values*/ 1714d65a2f8fSBarry Smith MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1715d65a2f8fSBarry Smith MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1716c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1717d65a2f8fSBarry Smith 1718d65a2f8fSBarry Smith /* insert into matrix */ 1719d65a2f8fSBarry Smith jj = rstart; 1720d65a2f8fSBarry Smith smycols = mycols; 1721d65a2f8fSBarry Smith svals = vals; 1722d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1723d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1724d65a2f8fSBarry Smith smycols += ourlens[i]; 1725d65a2f8fSBarry Smith svals += ourlens[i]; 1726d65a2f8fSBarry Smith jj++; 1727d65a2f8fSBarry Smith } 1728d65a2f8fSBarry Smith } 17290452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1730d65a2f8fSBarry Smith 17316d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 17326d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1733416022c9SBarry Smith return 0; 1734416022c9SBarry Smith } 1735