1cb512458SBarry Smith #ifndef lint 2*47794344SBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.191 1997/02/22 02:25:15 bsmith Exp bsmith $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 570f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h" 6f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 7d9942c19SSatish Balay #include "src/inline/spops.h" 88a729477SBarry Smith 99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 119e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 129e25ed09SBarry Smith length of colmap equals the global matrix length. 139e25ed09SBarry Smith */ 145615d1e5SSatish Balay #undef __FUNC__ 155eea60f9SBarry Smith #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */ 160a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat) 179e25ed09SBarry Smith { 1844a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 19ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 20905e6a2fSBarry Smith int n = B->n,i; 21dbb450caSBarry Smith 220452661fSBarry Smith aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 23464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 24cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 25905e6a2fSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 269e25ed09SBarry Smith return 0; 279e25ed09SBarry Smith } 289e25ed09SBarry Smith 292493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 302493cbb0SBarry Smith 315615d1e5SSatish Balay #undef __FUNC__ 325615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIAIJ" 333b2fbd54SBarry Smith static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 343b2fbd54SBarry Smith PetscTruth *done) 35299609e3SLois Curfman McInnes { 36299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 37299609e3SLois Curfman McInnes int ierr; 3817699dbbSLois Curfman McInnes if (aij->size == 1) { 393b2fbd54SBarry Smith ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 40e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 413b2fbd54SBarry Smith return 0; 423b2fbd54SBarry Smith } 433b2fbd54SBarry Smith 445615d1e5SSatish Balay #undef __FUNC__ 455eea60f9SBarry Smith #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */ 463b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 473b2fbd54SBarry Smith PetscTruth *done) 483b2fbd54SBarry Smith { 493b2fbd54SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 503b2fbd54SBarry Smith int ierr; 513b2fbd54SBarry Smith if (aij->size == 1) { 523b2fbd54SBarry Smith ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 53e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 54299609e3SLois Curfman McInnes return 0; 55299609e3SLois Curfman McInnes } 56299609e3SLois Curfman McInnes 570520107fSSatish Balay #define CHUNKSIZE 15 58f5e9677aSSatish Balay #define MatSetValues_SeqAIJ_A_Private(A,row,col,value,addv) \ 590520107fSSatish Balay { \ 600520107fSSatish Balay \ 610520107fSSatish Balay rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 620520107fSSatish Balay rmax = imax[row]; nrow = ailen[row]; \ 63f5e9677aSSatish Balay col1 = col - shift; \ 64f5e9677aSSatish Balay \ 650520107fSSatish Balay for ( _i=0; _i<nrow; _i++ ) { \ 66f5e9677aSSatish Balay if (rp[_i] > col1) break; \ 67f5e9677aSSatish Balay if (rp[_i] == col1) { \ 680520107fSSatish Balay if (addv == ADD_VALUES) ap[_i] += value; \ 690520107fSSatish Balay else ap[_i] = value; \ 700520107fSSatish Balay goto _noinsert; \ 710520107fSSatish Balay } \ 720520107fSSatish Balay } \ 73f5e9677aSSatish Balay if (nonew) goto _noinsert; \ 740520107fSSatish Balay if (nrow >= rmax) { \ 750520107fSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 760520107fSSatish Balay int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 770520107fSSatish Balay Scalar *new_a; \ 780520107fSSatish Balay \ 790520107fSSatish Balay /* malloc new storage space */ \ 800520107fSSatish Balay len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 810520107fSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 820520107fSSatish Balay new_j = (int *) (new_a + new_nz); \ 830520107fSSatish Balay new_i = new_j + new_nz; \ 840520107fSSatish Balay \ 850520107fSSatish Balay /* copy over old data into new slots */ \ 860520107fSSatish Balay for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 870520107fSSatish Balay for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 880520107fSSatish Balay PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 890520107fSSatish Balay len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 900520107fSSatish Balay PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 910520107fSSatish Balay len*sizeof(int)); \ 920520107fSSatish Balay PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 930520107fSSatish Balay PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 940520107fSSatish Balay len*sizeof(Scalar)); \ 950520107fSSatish Balay /* free up old matrix storage */ \ 96f5e9677aSSatish Balay \ 970520107fSSatish Balay PetscFree(a->a); \ 980520107fSSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 990520107fSSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 1000520107fSSatish Balay a->singlemalloc = 1; \ 1010520107fSSatish Balay \ 1020520107fSSatish Balay rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 1030520107fSSatish Balay rmax = imax[row] = imax[row] + CHUNKSIZE; \ 1040520107fSSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 1050520107fSSatish Balay a->maxnz += CHUNKSIZE; \ 1060520107fSSatish Balay a->reallocs++; \ 1070520107fSSatish Balay } \ 1080520107fSSatish Balay N = nrow++ - 1; a->nz++; \ 1090520107fSSatish Balay /* shift up all the later entries in this row */ \ 1100520107fSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 1110520107fSSatish Balay rp[ii+1] = rp[ii]; \ 1120520107fSSatish Balay ap[ii+1] = ap[ii]; \ 1130520107fSSatish Balay } \ 114f5e9677aSSatish Balay rp[_i] = col1; \ 1150520107fSSatish Balay ap[_i] = value; \ 1160520107fSSatish Balay _noinsert: ; \ 1170520107fSSatish Balay ailen[row] = nrow; \ 1180520107fSSatish Balay } 1190a198c4cSBarry Smith 1200520107fSSatish Balay extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 1215615d1e5SSatish Balay #undef __FUNC__ 1225615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIAIJ" 1234b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 1248a729477SBarry Smith { 12544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1264b0e389bSBarry Smith Scalar value; 1271eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 1281eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 129905e6a2fSBarry Smith int roworiented = aij->roworiented; 1308a729477SBarry Smith 1310520107fSSatish Balay /* Some Variables required in the macro */ 1324ee7247eSSatish Balay Mat A = aij->A; 1334ee7247eSSatish Balay Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 1344ee7247eSSatish Balay int *rp,ii,nrow,_i,rmax, N, col1; 1350520107fSSatish Balay int *imax = a->imax, *ai = a->i, *ailen = a->ilen; 1364ee7247eSSatish Balay int *aj = a->j, nonew = a->nonew,shift = a->indexshift; 1370520107fSSatish Balay Scalar *ap, *aa = a->a; 1384ee7247eSSatish Balay 1398a729477SBarry Smith for ( i=0; i<m; i++ ) { 1400a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 141e3372554SBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 142e3372554SBarry Smith if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 1430a198c4cSBarry Smith #endif 1444b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 1454b0e389bSBarry Smith row = im[i] - rstart; 1461eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 1474b0e389bSBarry Smith if (in[j] >= cstart && in[j] < cend){ 1484b0e389bSBarry Smith col = in[j] - cstart; 1494b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 150f5e9677aSSatish Balay MatSetValues_SeqAIJ_A_Private(aij->A,row,col,value,addv); 1510520107fSSatish Balay /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 1521eb62cbbSBarry Smith } 1530a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 154e3372554SBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 155e3372554SBarry Smith else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 1560a198c4cSBarry Smith #endif 1571eb62cbbSBarry Smith else { 158227d817aSBarry Smith if (mat->was_assembled) { 159905e6a2fSBarry Smith if (!aij->colmap) { 160905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 161905e6a2fSBarry Smith } 162905e6a2fSBarry Smith col = aij->colmap[in[j]] - 1; 163ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 1642493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 1654b0e389bSBarry Smith col = in[j]; 166d6dfbf8fSBarry Smith } 1679e25ed09SBarry Smith } 1684b0e389bSBarry Smith else col = in[j]; 1694b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 1700a198c4cSBarry Smith ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 1711eb62cbbSBarry Smith } 1721eb62cbbSBarry Smith } 1731eb62cbbSBarry Smith } 1741eb62cbbSBarry Smith else { 17590f02eecSBarry Smith if (roworiented && !aij->donotstash) { 1764b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 1774b0e389bSBarry Smith } 1784b0e389bSBarry Smith else { 17990f02eecSBarry Smith if (!aij->donotstash) { 1804b0e389bSBarry Smith row = im[i]; 1814b0e389bSBarry Smith for ( j=0; j<n; j++ ) { 1824b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 1834b0e389bSBarry Smith } 1844b0e389bSBarry Smith } 1851eb62cbbSBarry Smith } 1868a729477SBarry Smith } 18790f02eecSBarry Smith } 1888a729477SBarry Smith return 0; 1898a729477SBarry Smith } 1908a729477SBarry Smith 1915615d1e5SSatish Balay #undef __FUNC__ 1925615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIAIJ" 193b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 194b49de8d1SLois Curfman McInnes { 195b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 196b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 197b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 198b49de8d1SLois Curfman McInnes 199b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 200e3372554SBarry Smith if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 201e3372554SBarry Smith if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 202b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 203b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 204b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 205e3372554SBarry Smith if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 206e3372554SBarry Smith if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 207b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 208b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 209b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 210b49de8d1SLois Curfman McInnes } 211b49de8d1SLois Curfman McInnes else { 212905e6a2fSBarry Smith if (!aij->colmap) { 213905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 214905e6a2fSBarry Smith } 215905e6a2fSBarry Smith col = aij->colmap[idxn[j]] - 1; 216e60e1c95SSatish Balay if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 217d9d09a02SSatish Balay else { 218b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 219b49de8d1SLois Curfman McInnes } 220b49de8d1SLois Curfman McInnes } 221b49de8d1SLois Curfman McInnes } 222d9d09a02SSatish Balay } 223b49de8d1SLois Curfman McInnes else { 224e3372554SBarry Smith SETERRQ(1,0,"Only local values currently supported"); 225b49de8d1SLois Curfman McInnes } 226b49de8d1SLois Curfman McInnes } 227b49de8d1SLois Curfman McInnes return 0; 228b49de8d1SLois Curfman McInnes } 229b49de8d1SLois Curfman McInnes 2305615d1e5SSatish Balay #undef __FUNC__ 2315615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 232fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 2338a729477SBarry Smith { 23444a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 235d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 23617699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 23717699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 2381eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 2396abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 2401eb62cbbSBarry Smith InsertMode addv; 2411eb62cbbSBarry Smith Scalar *rvalues,*svalues; 2421eb62cbbSBarry Smith 2431eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 244*47794344SBarry Smith MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 245dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 246e3372554SBarry Smith SETERRQ(1,0,"Some processors inserted others added"); 2471eb62cbbSBarry Smith } 248*47794344SBarry Smith mat->insertmode = addv; /* in case this processor had no cache */ 2491eb62cbbSBarry Smith 2501eb62cbbSBarry Smith /* first count number of contributors to each processor */ 2510452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 252cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 2530452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 2541eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 2551eb62cbbSBarry Smith idx = aij->stash.idx[i]; 25617699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 2571eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 2581eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 2598a729477SBarry Smith } 2608a729477SBarry Smith } 2618a729477SBarry Smith } 26217699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 2631eb62cbbSBarry Smith 2641eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 2650452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 26617699dbbSLois Curfman McInnes MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 26717699dbbSLois Curfman McInnes nreceives = work[rank]; 26817699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 26917699dbbSLois Curfman McInnes nmax = work[rank]; 2700452661fSBarry Smith PetscFree(work); 2711eb62cbbSBarry Smith 2721eb62cbbSBarry Smith /* post receives: 2731eb62cbbSBarry Smith 1) each message will consist of ordered pairs 2741eb62cbbSBarry Smith (global index,value) we store the global index as a double 275d6dfbf8fSBarry Smith to simplify the message passing. 2761eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 2771eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 2781eb62cbbSBarry Smith this is a lot of wasted space. 2791eb62cbbSBarry Smith 2801eb62cbbSBarry Smith 2811eb62cbbSBarry Smith This could be done better. 2821eb62cbbSBarry Smith */ 2830452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 28478b31e54SBarry Smith CHKPTRQ(rvalues); 2850452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 28678b31e54SBarry Smith CHKPTRQ(recv_waits); 2871eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 288416022c9SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 2891eb62cbbSBarry Smith comm,recv_waits+i); 2901eb62cbbSBarry Smith } 2911eb62cbbSBarry Smith 2921eb62cbbSBarry Smith /* do sends: 2931eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 2941eb62cbbSBarry Smith the ith processor 2951eb62cbbSBarry Smith */ 2960452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 2970452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 29878b31e54SBarry Smith CHKPTRQ(send_waits); 2990452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 3001eb62cbbSBarry Smith starts[0] = 0; 30117699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3021eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 3031eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 3041eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 3051eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 3061eb62cbbSBarry Smith } 3070452661fSBarry Smith PetscFree(owner); 3081eb62cbbSBarry Smith starts[0] = 0; 30917699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3101eb62cbbSBarry Smith count = 0; 31117699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 3121eb62cbbSBarry Smith if (procs[i]) { 313416022c9SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 3141eb62cbbSBarry Smith comm,send_waits+count++); 3151eb62cbbSBarry Smith } 3161eb62cbbSBarry Smith } 3170452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 3181eb62cbbSBarry Smith 3191eb62cbbSBarry Smith /* Free cache space */ 32055908cccSLois Curfman McInnes PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 32178b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 3221eb62cbbSBarry Smith 3231eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 3241eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 3251eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 3261eb62cbbSBarry Smith aij->rmax = nmax; 3271eb62cbbSBarry Smith 3281eb62cbbSBarry Smith return 0; 3291eb62cbbSBarry Smith } 33044a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 3311eb62cbbSBarry Smith 3325615d1e5SSatish Balay #undef __FUNC__ 3335615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 334fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 3351eb62cbbSBarry Smith { 33644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 3371eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 338416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 339905e6a2fSBarry Smith int row,col,other_disassembled; 3401eb62cbbSBarry Smith Scalar *values,val; 341*47794344SBarry Smith InsertMode addv = mat->insertmode; 3421eb62cbbSBarry Smith 3431eb62cbbSBarry Smith /* wait on receives */ 3441eb62cbbSBarry Smith while (count) { 345d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 3461eb62cbbSBarry Smith /* unpack receives into our local space */ 347d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 348c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 3491eb62cbbSBarry Smith n = n/3; 3501eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 351227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - aij->rstart; 352227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 3531eb62cbbSBarry Smith val = values[3*i+2]; 3541eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 3551eb62cbbSBarry Smith col -= aij->cstart; 3561eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 3571eb62cbbSBarry Smith } 3581eb62cbbSBarry Smith else { 35955a1b374SBarry Smith if (mat->was_assembled || mat->assembled) { 360905e6a2fSBarry Smith if (!aij->colmap) { 361905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 362905e6a2fSBarry Smith } 363905e6a2fSBarry Smith col = aij->colmap[col] - 1; 364ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 3652493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 366227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 367d6dfbf8fSBarry Smith } 3689e25ed09SBarry Smith } 3691eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 3701eb62cbbSBarry Smith } 3711eb62cbbSBarry Smith } 3721eb62cbbSBarry Smith count--; 3731eb62cbbSBarry Smith } 3740452661fSBarry Smith PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 3751eb62cbbSBarry Smith 3761eb62cbbSBarry Smith /* wait on sends */ 3771eb62cbbSBarry Smith if (aij->nsends) { 3780a198c4cSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 3791eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 3800452661fSBarry Smith PetscFree(send_status); 3811eb62cbbSBarry Smith } 3820452661fSBarry Smith PetscFree(aij->send_waits); PetscFree(aij->svalues); 3831eb62cbbSBarry Smith 38478b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 38578b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 3861eb62cbbSBarry Smith 3872493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 3882493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 389227d817aSBarry Smith MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 390227d817aSBarry Smith if (mat->was_assembled && !other_disassembled) { 3912493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 3922493cbb0SBarry Smith } 3932493cbb0SBarry Smith 3946d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 39578b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 3965e42470aSBarry Smith } 39778b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 39878b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 3995e42470aSBarry Smith 4007a0afa10SBarry Smith if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 4018a729477SBarry Smith return 0; 4028a729477SBarry Smith } 4038a729477SBarry Smith 4045615d1e5SSatish Balay #undef __FUNC__ 4055615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIAIJ" 4062ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A) 4071eb62cbbSBarry Smith { 40844a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 409dbd7a890SLois Curfman McInnes int ierr; 41078b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 41178b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 4121eb62cbbSBarry Smith return 0; 4131eb62cbbSBarry Smith } 4141eb62cbbSBarry Smith 4151eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 4161eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 4171eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 4181eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 4191eb62cbbSBarry Smith routine. 4201eb62cbbSBarry Smith */ 4215615d1e5SSatish Balay #undef __FUNC__ 4225615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIAIJ" 42344a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 4241eb62cbbSBarry Smith { 42544a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 42617699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 4276abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 42817699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 4295392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 430d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 431d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 4321eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 4331eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 4341eb62cbbSBarry Smith IS istmp; 4351eb62cbbSBarry Smith 43677c4ece6SBarry Smith ierr = ISGetSize(is,&N); CHKERRQ(ierr); 43778b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 4381eb62cbbSBarry Smith 4391eb62cbbSBarry Smith /* first count number of contributors to each processor */ 4400452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 441cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 4420452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 4431eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 4441eb62cbbSBarry Smith idx = rows[i]; 4451eb62cbbSBarry Smith found = 0; 44617699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 4471eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 4481eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 4491eb62cbbSBarry Smith } 4501eb62cbbSBarry Smith } 451e3372554SBarry Smith if (!found) SETERRQ(1,0,"Index out of range"); 4521eb62cbbSBarry Smith } 45317699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 4541eb62cbbSBarry Smith 4551eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 4560452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 45717699dbbSLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 45817699dbbSLois Curfman McInnes nrecvs = work[rank]; 45917699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 46017699dbbSLois Curfman McInnes nmax = work[rank]; 4610452661fSBarry Smith PetscFree(work); 4621eb62cbbSBarry Smith 4631eb62cbbSBarry Smith /* post receives: */ 4640452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 46578b31e54SBarry Smith CHKPTRQ(rvalues); 4660452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 46778b31e54SBarry Smith CHKPTRQ(recv_waits); 4681eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 469416022c9SBarry Smith MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 4701eb62cbbSBarry Smith } 4711eb62cbbSBarry Smith 4721eb62cbbSBarry Smith /* do sends: 4731eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 4741eb62cbbSBarry Smith the ith processor 4751eb62cbbSBarry Smith */ 4760452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 4770452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 47878b31e54SBarry Smith CHKPTRQ(send_waits); 4790452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 4801eb62cbbSBarry Smith starts[0] = 0; 48117699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 4821eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 4831eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 4841eb62cbbSBarry Smith } 4851eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 4861eb62cbbSBarry Smith 4871eb62cbbSBarry Smith starts[0] = 0; 48817699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 4891eb62cbbSBarry Smith count = 0; 49017699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 4911eb62cbbSBarry Smith if (procs[i]) { 492416022c9SBarry Smith MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 4931eb62cbbSBarry Smith } 4941eb62cbbSBarry Smith } 4950452661fSBarry Smith PetscFree(starts); 4961eb62cbbSBarry Smith 49717699dbbSLois Curfman McInnes base = owners[rank]; 4981eb62cbbSBarry Smith 4991eb62cbbSBarry Smith /* wait on receives */ 5000452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 5011eb62cbbSBarry Smith source = lens + nrecvs; 5021eb62cbbSBarry Smith count = nrecvs; slen = 0; 5031eb62cbbSBarry Smith while (count) { 504d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 5051eb62cbbSBarry Smith /* unpack receives into our local space */ 5061eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 507d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 508d6dfbf8fSBarry Smith lens[imdex] = n; 5091eb62cbbSBarry Smith slen += n; 5101eb62cbbSBarry Smith count--; 5111eb62cbbSBarry Smith } 5120452661fSBarry Smith PetscFree(recv_waits); 5131eb62cbbSBarry Smith 5141eb62cbbSBarry Smith /* move the data into the send scatter */ 5150452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 5161eb62cbbSBarry Smith count = 0; 5171eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 5181eb62cbbSBarry Smith values = rvalues + i*nmax; 5191eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 5201eb62cbbSBarry Smith lrows[count++] = values[j] - base; 5211eb62cbbSBarry Smith } 5221eb62cbbSBarry Smith } 5230452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 5240452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 5251eb62cbbSBarry Smith 5261eb62cbbSBarry Smith /* actually zap the local rows */ 527537820f0SBarry Smith ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 528464493b3SBarry Smith PLogObjectParent(A,istmp); 5290452661fSBarry Smith PetscFree(lrows); 53078b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 53178b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 53278b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 5331eb62cbbSBarry Smith 5341eb62cbbSBarry Smith /* wait on sends */ 5351eb62cbbSBarry Smith if (nsends) { 5360452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 53778b31e54SBarry Smith CHKPTRQ(send_status); 5381eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 5390452661fSBarry Smith PetscFree(send_status); 5401eb62cbbSBarry Smith } 5410452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 5421eb62cbbSBarry Smith 5431eb62cbbSBarry Smith return 0; 5441eb62cbbSBarry Smith } 5451eb62cbbSBarry Smith 5465615d1e5SSatish Balay #undef __FUNC__ 5475615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIAIJ" 548416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 5491eb62cbbSBarry Smith { 550416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 551fbd6ef76SBarry Smith int ierr,nt; 552416022c9SBarry Smith 553a2ce50c7SBarry Smith ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 554fbd6ef76SBarry Smith if (nt != a->n) { 555f40265daSLois Curfman McInnes SETERRQ(1,0,"Incompatible partition of A and xx"); 556fbd6ef76SBarry Smith } 55743a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 55838597bd4SSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 55943a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 56038597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 5611eb62cbbSBarry Smith return 0; 5621eb62cbbSBarry Smith } 5631eb62cbbSBarry Smith 5645615d1e5SSatish Balay #undef __FUNC__ 5655615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIAIJ" 566416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 567da3a660dSBarry Smith { 568416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 569da3a660dSBarry Smith int ierr; 57043a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 57138597bd4SSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 57243a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 57338597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 574da3a660dSBarry Smith return 0; 575da3a660dSBarry Smith } 576da3a660dSBarry Smith 5775615d1e5SSatish Balay #undef __FUNC__ 5785615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIAIJ" 579416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 580da3a660dSBarry Smith { 581416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 582da3a660dSBarry Smith int ierr; 583da3a660dSBarry Smith 584da3a660dSBarry Smith /* do nondiagonal part */ 58538597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 586da3a660dSBarry Smith /* send it on its way */ 587537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 588da3a660dSBarry Smith /* do local part */ 58938597bd4SSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 590da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 591da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 592da3a660dSBarry Smith /* but is not perhaps always true. */ 593537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 594da3a660dSBarry Smith return 0; 595da3a660dSBarry Smith } 596da3a660dSBarry Smith 5975615d1e5SSatish Balay #undef __FUNC__ 5985615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIAIJ" 599416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 600da3a660dSBarry Smith { 601416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 602da3a660dSBarry Smith int ierr; 603da3a660dSBarry Smith 604da3a660dSBarry Smith /* do nondiagonal part */ 60538597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 606da3a660dSBarry Smith /* send it on its way */ 607537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 608da3a660dSBarry Smith /* do local part */ 60938597bd4SSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 610da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 611da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 612da3a660dSBarry Smith /* but is not perhaps always true. */ 6130a198c4cSBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 614da3a660dSBarry Smith return 0; 615da3a660dSBarry Smith } 616da3a660dSBarry Smith 6171eb62cbbSBarry Smith /* 6181eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 6191eb62cbbSBarry Smith diagonal block 6201eb62cbbSBarry Smith */ 6215615d1e5SSatish Balay #undef __FUNC__ 6225615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIAIJ" 623416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 6241eb62cbbSBarry Smith { 625416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 62644cd7ae7SLois Curfman McInnes if (a->M != a->N) 627e3372554SBarry Smith SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 6285baf8537SBarry Smith if (a->rstart != a->cstart || a->rend != a->cend) { 629e3372554SBarry Smith SETERRQ(1,0,"row partition must equal col partition"); } 630416022c9SBarry Smith return MatGetDiagonal(a->A,v); 6311eb62cbbSBarry Smith } 6321eb62cbbSBarry Smith 6335615d1e5SSatish Balay #undef __FUNC__ 6345615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIAIJ" 635052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A) 636052efed2SBarry Smith { 637052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 638052efed2SBarry Smith int ierr; 639052efed2SBarry Smith ierr = MatScale(aa,a->A); CHKERRQ(ierr); 640052efed2SBarry Smith ierr = MatScale(aa,a->B); CHKERRQ(ierr); 641052efed2SBarry Smith return 0; 642052efed2SBarry Smith } 643052efed2SBarry Smith 6445615d1e5SSatish Balay #undef __FUNC__ 6455eea60f9SBarry Smith #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */ 64644a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj) 6471eb62cbbSBarry Smith { 6481eb62cbbSBarry Smith Mat mat = (Mat) obj; 64944a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 6501eb62cbbSBarry Smith int ierr; 651a5a9c739SBarry Smith #if defined(PETSC_LOG) 6526e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 653a5a9c739SBarry Smith #endif 6540452661fSBarry Smith PetscFree(aij->rowners); 65578b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 65678b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 6570452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 6580452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 6591eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 660a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 6617a0afa10SBarry Smith if (aij->rowvalues) PetscFree(aij->rowvalues); 6620452661fSBarry Smith PetscFree(aij); 66390f02eecSBarry Smith if (mat->mapping) { 66490f02eecSBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 66590f02eecSBarry Smith } 666a5a9c739SBarry Smith PLogObjectDestroy(mat); 6670452661fSBarry Smith PetscHeaderDestroy(mat); 6681eb62cbbSBarry Smith return 0; 6691eb62cbbSBarry Smith } 6707857610eSBarry Smith #include "draw.h" 671b16a3bb1SBarry Smith #include "pinclude/pviewer.h" 672ee50ffe9SBarry Smith 6735615d1e5SSatish Balay #undef __FUNC__ 6745eea60f9SBarry Smith #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */ 675416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 6761eb62cbbSBarry Smith { 677416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 678416022c9SBarry Smith int ierr; 679416022c9SBarry Smith 68017699dbbSLois Curfman McInnes if (aij->size == 1) { 681416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 682416022c9SBarry Smith } 683e3372554SBarry Smith else SETERRQ(1,0,"Only uniprocessor output supported"); 684416022c9SBarry Smith return 0; 685416022c9SBarry Smith } 686416022c9SBarry Smith 6875615d1e5SSatish Balay #undef __FUNC__ 6885eea60f9SBarry Smith #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */ 689d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 690416022c9SBarry Smith { 69144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 692dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 693a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 694d636dbe3SBarry Smith FILE *fd; 69519bcc07fSBarry Smith ViewerType vtype; 696416022c9SBarry Smith 69719bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 69819bcc07fSBarry Smith if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 69990ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 7000a198c4cSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 7014e220ebcSLois Curfman McInnes MatInfo info; 7024e220ebcSLois Curfman McInnes int flg; 703a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 70490ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 7054e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 70695e01e2fSLois Curfman McInnes ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 70777c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 70895e01e2fSLois Curfman McInnes if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 7094e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 71095e01e2fSLois Curfman McInnes else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 7114e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 7124e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 7134e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 7144e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 7154e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 716a56f8943SBarry Smith fflush(fd); 71777c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 718a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 719416022c9SBarry Smith return 0; 720416022c9SBarry Smith } 7210a198c4cSBarry Smith else if (format == VIEWER_FORMAT_ASCII_INFO) { 72208480c60SBarry Smith return 0; 72308480c60SBarry Smith } 724416022c9SBarry Smith } 725416022c9SBarry Smith 72619bcc07fSBarry Smith if (vtype == DRAW_VIEWER) { 72719bcc07fSBarry Smith Draw draw; 72819bcc07fSBarry Smith PetscTruth isnull; 72919bcc07fSBarry Smith ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 73019bcc07fSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 73119bcc07fSBarry Smith } 73219bcc07fSBarry Smith 73319bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER) { 73490ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 73577c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 736d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 73717699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 7381eb62cbbSBarry Smith aij->cend); 73978b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 74078b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 741d13ab20cSBarry Smith fflush(fd); 74277c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 743d13ab20cSBarry Smith } 744416022c9SBarry Smith else { 745a56f8943SBarry Smith int size = aij->size; 746a56f8943SBarry Smith rank = aij->rank; 74717699dbbSLois Curfman McInnes if (size == 1) { 74878b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 74995373324SBarry Smith } 75095373324SBarry Smith else { 75195373324SBarry Smith /* assemble the entire matrix onto first processor. */ 75295373324SBarry Smith Mat A; 753ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 7542eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 75595373324SBarry Smith Scalar *a; 7562ee70a88SLois Curfman McInnes 75717699dbbSLois Curfman McInnes if (!rank) { 758b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 759c451ab25SLois Curfman McInnes CHKERRQ(ierr); 76095373324SBarry Smith } 76195373324SBarry Smith else { 762b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 763c451ab25SLois Curfman McInnes CHKERRQ(ierr); 76495373324SBarry Smith } 765464493b3SBarry Smith PLogObjectParent(mat,A); 766416022c9SBarry Smith 76795373324SBarry Smith /* copy over the A part */ 768ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 7692ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 77095373324SBarry Smith row = aij->rstart; 771dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 77295373324SBarry Smith for ( i=0; i<m; i++ ) { 773416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 77495373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 77595373324SBarry Smith } 7762ee70a88SLois Curfman McInnes aj = Aloc->j; 777dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 77895373324SBarry Smith 77995373324SBarry Smith /* copy over the B part */ 780ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 7812ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 78295373324SBarry Smith row = aij->rstart; 7830452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 784dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 78595373324SBarry Smith for ( i=0; i<m; i++ ) { 786416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 78795373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 78895373324SBarry Smith } 7890452661fSBarry Smith PetscFree(ct); 7906d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 7916d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 79217699dbbSLois Curfman McInnes if (!rank) { 79378b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 79495373324SBarry Smith } 79578b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 79695373324SBarry Smith } 79795373324SBarry Smith } 7981eb62cbbSBarry Smith return 0; 7991eb62cbbSBarry Smith } 8001eb62cbbSBarry Smith 8015615d1e5SSatish Balay #undef __FUNC__ 8025eea60f9SBarry Smith #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */ 803416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 804416022c9SBarry Smith { 805416022c9SBarry Smith Mat mat = (Mat) obj; 806416022c9SBarry Smith int ierr; 80719bcc07fSBarry Smith ViewerType vtype; 808416022c9SBarry Smith 80919bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 81019bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 81119bcc07fSBarry Smith vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 812d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 813416022c9SBarry Smith } 81419bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 815416022c9SBarry Smith return MatView_MPIAIJ_Binary(mat,viewer); 816416022c9SBarry Smith } 817416022c9SBarry Smith return 0; 818416022c9SBarry Smith } 819416022c9SBarry Smith 8201eb62cbbSBarry Smith /* 8211eb62cbbSBarry Smith This has to provide several versions. 8221eb62cbbSBarry Smith 8231eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 8241eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 825d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 8261eb62cbbSBarry Smith */ 8275615d1e5SSatish Balay #undef __FUNC__ 8285615d1e5SSatish Balay #define __FUNC__ "MatRelax_MPIAIJ" 829fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 830dbb450caSBarry Smith double fshift,int its,Vec xx) 8318a729477SBarry Smith { 83244a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 833d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 834ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 835c16cb8f2SBarry Smith Scalar *b,*x,*xs,*ls,d,*v,sum; 8366abc6512SBarry Smith int ierr,*idx, *diag; 837416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 8388a729477SBarry Smith 839d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 840dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 841dbb450caSBarry Smith ls = ls + shift; 842ec8511deSBarry Smith if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 843d6dfbf8fSBarry Smith diag = A->diag; 844c16cb8f2SBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 845da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 84638597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 847da3a660dSBarry Smith } 84843a90d84SBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 84978b31e54SBarry Smith CHKERRQ(ierr); 85043a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 85178b31e54SBarry Smith CHKERRQ(ierr); 852d6dfbf8fSBarry Smith while (its--) { 853d6dfbf8fSBarry Smith /* go down through the rows */ 854d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 855d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 856dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 857dbb450caSBarry Smith v = A->a + A->i[i] + shift; 858d6dfbf8fSBarry Smith sum = b[i]; 859d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 860dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 861d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 862dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 863dbb450caSBarry Smith v = B->a + B->i[i] + shift; 864d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 86555a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 866d6dfbf8fSBarry Smith } 867d6dfbf8fSBarry Smith /* come up through the rows */ 868d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 869d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 870dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 871dbb450caSBarry Smith v = A->a + A->i[i] + shift; 872d6dfbf8fSBarry Smith sum = b[i]; 873d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 874dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 875d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 876dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 877dbb450caSBarry Smith v = B->a + B->i[i] + shift; 878d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 87955a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 880d6dfbf8fSBarry Smith } 881d6dfbf8fSBarry Smith } 882d6dfbf8fSBarry Smith } 883d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 884da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 88538597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 886da3a660dSBarry Smith } 88743a90d84SBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 88878b31e54SBarry Smith CHKERRQ(ierr); 88943a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 89078b31e54SBarry Smith CHKERRQ(ierr); 891d6dfbf8fSBarry Smith while (its--) { 892d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 893d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 894dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 895dbb450caSBarry Smith v = A->a + A->i[i] + shift; 896d6dfbf8fSBarry Smith sum = b[i]; 897d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 898dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 899d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 900dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 901dbb450caSBarry Smith v = B->a + B->i[i] + shift; 902d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 90355a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 904d6dfbf8fSBarry Smith } 905d6dfbf8fSBarry Smith } 906d6dfbf8fSBarry Smith } 907d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 908da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 90938597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 910da3a660dSBarry Smith } 91143a90d84SBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 91278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 91343a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 91478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 915d6dfbf8fSBarry Smith while (its--) { 916d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 917d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 918dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 919dbb450caSBarry Smith v = A->a + A->i[i] + shift; 920d6dfbf8fSBarry Smith sum = b[i]; 921d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 922dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 923d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 924dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 925dbb450caSBarry Smith v = B->a + B->i[i] + shift; 926d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 92755a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 928d6dfbf8fSBarry Smith } 929d6dfbf8fSBarry Smith } 930d6dfbf8fSBarry Smith } 931c16cb8f2SBarry Smith else { 932e3372554SBarry Smith SETERRQ(1,0,"Option not supported"); 933c16cb8f2SBarry Smith } 9348a729477SBarry Smith return 0; 9358a729477SBarry Smith } 936a66be287SLois Curfman McInnes 9375615d1e5SSatish Balay #undef __FUNC__ 9385eea60f9SBarry Smith #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */ 9394e220ebcSLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 940a66be287SLois Curfman McInnes { 941a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 942a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 9434e220ebcSLois Curfman McInnes int ierr; 9444e220ebcSLois Curfman McInnes double isend[5], irecv[5]; 945a66be287SLois Curfman McInnes 9464e220ebcSLois Curfman McInnes info->rows_global = (double)mat->M; 9474e220ebcSLois Curfman McInnes info->columns_global = (double)mat->N; 9484e220ebcSLois Curfman McInnes info->rows_local = (double)mat->m; 9494e220ebcSLois Curfman McInnes info->columns_local = (double)mat->N; 9504e220ebcSLois Curfman McInnes info->block_size = 1.0; 9514e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 9524e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 9534e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 9544e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 9554e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 9564e220ebcSLois Curfman McInnes isend[3] += info->memory; isend[4] += info->mallocs; 957a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 9584e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 9594e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 9604e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 9614e220ebcSLois Curfman McInnes info->memory = isend[3]; 9624e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 963a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 9644e220ebcSLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 9654e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 9664e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 9674e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 9684e220ebcSLois Curfman McInnes info->memory = irecv[3]; 9694e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 970a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 9714e220ebcSLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 9724e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 9734e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 9744e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 9754e220ebcSLois Curfman McInnes info->memory = irecv[3]; 9764e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 977a66be287SLois Curfman McInnes } 9784e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 9794e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 9804e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 9814e220ebcSLois Curfman McInnes 982a66be287SLois Curfman McInnes return 0; 983a66be287SLois Curfman McInnes } 984a66be287SLois Curfman McInnes 985299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 986299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 987299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 988299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 989299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 990299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 991299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 992299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 993299609e3SLois Curfman McInnes 9945615d1e5SSatish Balay #undef __FUNC__ 9955eea60f9SBarry Smith #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */ 996416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op) 997c74985f6SBarry Smith { 998c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 999c74985f6SBarry Smith 10006d4a8577SBarry Smith if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 10016d4a8577SBarry Smith op == MAT_YES_NEW_NONZERO_LOCATIONS || 10026da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1003b1fbbac0SLois Curfman McInnes op == MAT_COLUMNS_SORTED) { 1004b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 1005b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 1006b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1007aeafbbfcSLois Curfman McInnes a->roworiented = 1; 1008c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 1009c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 1010b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 10116da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 10126d4a8577SBarry Smith op == MAT_SYMMETRIC || 10136d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 10146d4a8577SBarry Smith op == MAT_YES_NEW_DIAGONALS) 101594a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 10166d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) { 10174b0e389bSBarry Smith a->roworiented = 0; 10184b0e389bSBarry Smith MatSetOption(a->A,op); 10194b0e389bSBarry Smith MatSetOption(a->B,op); 102090f02eecSBarry Smith } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) { 102190f02eecSBarry Smith a->donotstash = 1; 102290f02eecSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) 1023e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1024c0bbcb79SLois Curfman McInnes else 1025e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1026c74985f6SBarry Smith return 0; 1027c74985f6SBarry Smith } 1028c74985f6SBarry Smith 10295615d1e5SSatish Balay #undef __FUNC__ 10305eea60f9SBarry Smith #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */ 1031d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1032c74985f6SBarry Smith { 103344a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1034c74985f6SBarry Smith *m = mat->M; *n = mat->N; 1035c74985f6SBarry Smith return 0; 1036c74985f6SBarry Smith } 1037c74985f6SBarry Smith 10385615d1e5SSatish Balay #undef __FUNC__ 10395eea60f9SBarry Smith #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */ 1040d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1041c74985f6SBarry Smith { 104244a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1043b7c46309SBarry Smith *m = mat->m; *n = mat->N; 1044c74985f6SBarry Smith return 0; 1045c74985f6SBarry Smith } 1046c74985f6SBarry Smith 10475615d1e5SSatish Balay #undef __FUNC__ 10485eea60f9SBarry Smith #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */ 1049d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1050c74985f6SBarry Smith { 105144a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1052c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 1053c74985f6SBarry Smith return 0; 1054c74985f6SBarry Smith } 1055c74985f6SBarry Smith 10566d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 10576d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 10586d84be18SBarry Smith 10595615d1e5SSatish Balay #undef __FUNC__ 10605615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIAIJ" 10616d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 106239e00950SLois Curfman McInnes { 1063154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 106470f0671dSBarry Smith Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1065154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1066154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 106770f0671dSBarry Smith int *cmap, *idx_p; 106839e00950SLois Curfman McInnes 1069e3372554SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 10707a0afa10SBarry Smith mat->getrowactive = PETSC_TRUE; 10717a0afa10SBarry Smith 107270f0671dSBarry Smith if (!mat->rowvalues && (idx || v)) { 10737a0afa10SBarry Smith /* 10747a0afa10SBarry Smith allocate enough space to hold information from the longest row. 10757a0afa10SBarry Smith */ 10767a0afa10SBarry Smith Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1077c16cb8f2SBarry Smith int max = 1,m = mat->m,tmp; 1078c16cb8f2SBarry Smith for ( i=0; i<m; i++ ) { 10797a0afa10SBarry Smith tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 10807a0afa10SBarry Smith if (max < tmp) { max = tmp; } 10817a0afa10SBarry Smith } 10827a0afa10SBarry Smith mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 10837a0afa10SBarry Smith CHKPTRQ(mat->rowvalues); 10847a0afa10SBarry Smith mat->rowindices = (int *) (mat->rowvalues + max); 10857a0afa10SBarry Smith } 10867a0afa10SBarry Smith 1087e3372554SBarry Smith if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1088abc0e9e4SLois Curfman McInnes lrow = row - rstart; 108939e00950SLois Curfman McInnes 1090154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1091154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1092154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 109338597bd4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 109438597bd4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1095154123eaSLois Curfman McInnes nztot = nzA + nzB; 1096154123eaSLois Curfman McInnes 109770f0671dSBarry Smith cmap = mat->garray; 1098154123eaSLois Curfman McInnes if (v || idx) { 1099154123eaSLois Curfman McInnes if (nztot) { 1100154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 110170f0671dSBarry Smith int imark = -1; 1102154123eaSLois Curfman McInnes if (v) { 110370f0671dSBarry Smith *v = v_p = mat->rowvalues; 110439e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 110570f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1106154123eaSLois Curfman McInnes else break; 1107154123eaSLois Curfman McInnes } 1108154123eaSLois Curfman McInnes imark = i; 110970f0671dSBarry Smith for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 111070f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1111154123eaSLois Curfman McInnes } 1112154123eaSLois Curfman McInnes if (idx) { 111370f0671dSBarry Smith *idx = idx_p = mat->rowindices; 111470f0671dSBarry Smith if (imark > -1) { 111570f0671dSBarry Smith for ( i=0; i<imark; i++ ) { 111670f0671dSBarry Smith idx_p[i] = cmap[cworkB[i]]; 111770f0671dSBarry Smith } 111870f0671dSBarry Smith } else { 1119154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 112070f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1121154123eaSLois Curfman McInnes else break; 1122154123eaSLois Curfman McInnes } 1123154123eaSLois Curfman McInnes imark = i; 112470f0671dSBarry Smith } 112570f0671dSBarry Smith for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 112670f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 112739e00950SLois Curfman McInnes } 112839e00950SLois Curfman McInnes } 11291ca473b0SSatish Balay else { 11301ca473b0SSatish Balay if (idx) *idx = 0; 11311ca473b0SSatish Balay if (v) *v = 0; 11321ca473b0SSatish Balay } 1133154123eaSLois Curfman McInnes } 113439e00950SLois Curfman McInnes *nz = nztot; 113538597bd4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 113638597bd4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 113739e00950SLois Curfman McInnes return 0; 113839e00950SLois Curfman McInnes } 113939e00950SLois Curfman McInnes 11405615d1e5SSatish Balay #undef __FUNC__ 11415eea60f9SBarry Smith #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */ 11426d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 114339e00950SLois Curfman McInnes { 11447a0afa10SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 11457a0afa10SBarry Smith if (aij->getrowactive == PETSC_FALSE) { 1146e3372554SBarry Smith SETERRQ(1,0,"MatGetRow not called"); 11477a0afa10SBarry Smith } 11487a0afa10SBarry Smith aij->getrowactive = PETSC_FALSE; 114939e00950SLois Curfman McInnes return 0; 115039e00950SLois Curfman McInnes } 115139e00950SLois Curfman McInnes 11525615d1e5SSatish Balay #undef __FUNC__ 11535615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIAIJ" 1154cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1155855ac2c5SLois Curfman McInnes { 1156855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1157ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1158416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1159416022c9SBarry Smith double sum = 0.0; 116004ca555eSLois Curfman McInnes Scalar *v; 116104ca555eSLois Curfman McInnes 116217699dbbSLois Curfman McInnes if (aij->size == 1) { 116314183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 116437fa93a5SLois Curfman McInnes } else { 116504ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 116604ca555eSLois Curfman McInnes v = amat->a; 116704ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 116804ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 116904ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 117004ca555eSLois Curfman McInnes #else 117104ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 117204ca555eSLois Curfman McInnes #endif 117304ca555eSLois Curfman McInnes } 117404ca555eSLois Curfman McInnes v = bmat->a; 117504ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 117604ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 117704ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 117804ca555eSLois Curfman McInnes #else 117904ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 118004ca555eSLois Curfman McInnes #endif 118104ca555eSLois Curfman McInnes } 11826d84be18SBarry Smith MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 118304ca555eSLois Curfman McInnes *norm = sqrt(*norm); 118404ca555eSLois Curfman McInnes } 118504ca555eSLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 118604ca555eSLois Curfman McInnes double *tmp, *tmp2; 118704ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 11880452661fSBarry Smith tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 11890452661fSBarry Smith tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1190cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 119104ca555eSLois Curfman McInnes *norm = 0.0; 119204ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 119304ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1194579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 119504ca555eSLois Curfman McInnes } 119604ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 119704ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1198579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 119904ca555eSLois Curfman McInnes } 12006d84be18SBarry Smith MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 120104ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 120204ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 120304ca555eSLois Curfman McInnes } 12040452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 120504ca555eSLois Curfman McInnes } 120604ca555eSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 1207515d9167SLois Curfman McInnes double ntemp = 0.0; 120804ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1209dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 121004ca555eSLois Curfman McInnes sum = 0.0; 121104ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1212cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 121304ca555eSLois Curfman McInnes } 1214dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 121504ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1216cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 121704ca555eSLois Curfman McInnes } 1218515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 121904ca555eSLois Curfman McInnes } 12206d84be18SBarry Smith MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 122104ca555eSLois Curfman McInnes } 122204ca555eSLois Curfman McInnes else { 1223e3372554SBarry Smith SETERRQ(1,0,"No support for two norm"); 122404ca555eSLois Curfman McInnes } 122537fa93a5SLois Curfman McInnes } 1226855ac2c5SLois Curfman McInnes return 0; 1227855ac2c5SLois Curfman McInnes } 1228855ac2c5SLois Curfman McInnes 12295615d1e5SSatish Balay #undef __FUNC__ 12305615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIAIJ" 12310de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1232b7c46309SBarry Smith { 1233b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1234dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1235416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1236416022c9SBarry Smith Mat B; 1237b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1238b7c46309SBarry Smith Scalar *array; 1239b7c46309SBarry Smith 12403638b69dSLois Curfman McInnes if (matout == PETSC_NULL && M != N) 1241e3372554SBarry Smith SETERRQ(1,0,"Square matrix only for in-place"); 1242b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1243b4fd4287SBarry Smith PETSC_NULL,&B); CHKERRQ(ierr); 1244b7c46309SBarry Smith 1245b7c46309SBarry Smith /* copy over the A part */ 1246ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1247b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1248b7c46309SBarry Smith row = a->rstart; 1249dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1250b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1251416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1252b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1253b7c46309SBarry Smith } 1254b7c46309SBarry Smith aj = Aloc->j; 12554af08d9eSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1256b7c46309SBarry Smith 1257b7c46309SBarry Smith /* copy over the B part */ 1258ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1259b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1260b7c46309SBarry Smith row = a->rstart; 12610452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1262dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1263b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1264416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1265b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1266b7c46309SBarry Smith } 12670452661fSBarry Smith PetscFree(ct); 12686d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12696d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12703638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 12710de55854SLois Curfman McInnes *matout = B; 12720de55854SLois Curfman McInnes } else { 12730de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 12740452661fSBarry Smith PetscFree(a->rowners); 12750de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 12760de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 12770452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 12780452661fSBarry Smith if (a->garray) PetscFree(a->garray); 12790de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1280a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 12810452661fSBarry Smith PetscFree(a); 1282416022c9SBarry Smith PetscMemcpy(A,B,sizeof(struct _Mat)); 12830452661fSBarry Smith PetscHeaderDestroy(B); 12840de55854SLois Curfman McInnes } 1285b7c46309SBarry Smith return 0; 1286b7c46309SBarry Smith } 1287b7c46309SBarry Smith 12885615d1e5SSatish Balay #undef __FUNC__ 12895615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIAIJ" 12904b967eb1SSatish Balay int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1291a008b906SSatish Balay { 12924b967eb1SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 12934b967eb1SSatish Balay Mat a = aij->A, b = aij->B; 1294a008b906SSatish Balay int ierr,s1,s2,s3; 1295a008b906SSatish Balay 12964b967eb1SSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 12974b967eb1SSatish Balay if (rr) { 12984b967eb1SSatish Balay s3 = aij->n; 12994b967eb1SSatish Balay VecGetLocalSize_Fast(rr,s1); 1300e3372554SBarry Smith if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 13014b967eb1SSatish Balay /* Overlap communication with computation. */ 130243a90d84SBarry Smith ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1303a008b906SSatish Balay } 13044b967eb1SSatish Balay if (ll) { 13054b967eb1SSatish Balay VecGetLocalSize_Fast(ll,s1); 1306e3372554SBarry Smith if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1307c351683dSSatish Balay ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 13084b967eb1SSatish Balay } 13094b967eb1SSatish Balay /* scale the diagonal block */ 1310c351683dSSatish Balay ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 13114b967eb1SSatish Balay 13124b967eb1SSatish Balay if (rr) { 13134b967eb1SSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 131443a90d84SBarry Smith ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1315c351683dSSatish Balay ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 13164b967eb1SSatish Balay } 13174b967eb1SSatish Balay 1318a008b906SSatish Balay return 0; 1319a008b906SSatish Balay } 1320a008b906SSatish Balay 1321a008b906SSatish Balay 1322682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat); 13235615d1e5SSatish Balay #undef __FUNC__ 13245eea60f9SBarry Smith #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */ 1325682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A) 1326682d7d0cSBarry Smith { 1327682d7d0cSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1328682d7d0cSBarry Smith 1329682d7d0cSBarry Smith if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1330682d7d0cSBarry Smith else return 0; 1331682d7d0cSBarry Smith } 1332682d7d0cSBarry Smith 13335615d1e5SSatish Balay #undef __FUNC__ 13345eea60f9SBarry Smith #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */ 13355a838052SSatish Balay static int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 13365a838052SSatish Balay { 13375a838052SSatish Balay *bs = 1; 13385a838052SSatish Balay return 0; 13395a838052SSatish Balay } 13405615d1e5SSatish Balay #undef __FUNC__ 13415eea60f9SBarry Smith #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */ 1342bb5a7306SBarry Smith static int MatSetUnfactored_MPIAIJ(Mat A) 1343bb5a7306SBarry Smith { 1344bb5a7306SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1345bb5a7306SBarry Smith int ierr; 1346bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1347bb5a7306SBarry Smith return 0; 1348bb5a7306SBarry Smith } 1349bb5a7306SBarry Smith 13503d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 13512f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 13520a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 13530a198c4cSBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 13548a729477SBarry Smith /* -------------------------------------------------------------------*/ 13552ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 135639e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 135744a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 135844a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 135936ce4990SBarry Smith 0,0, 136036ce4990SBarry Smith 0,0, 136136ce4990SBarry Smith 0,0, 136244a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1363b7c46309SBarry Smith MatTranspose_MPIAIJ, 1364a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1365a008b906SSatish Balay MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1366ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 13671eb62cbbSBarry Smith 0, 1368299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 136936ce4990SBarry Smith 0,0,0,0, 1370d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 137136ce4990SBarry Smith 0,0, 137294a9d846SBarry Smith 0,0,MatConvertSameType_MPIAIJ,0,0, 1373b49de8d1SLois Curfman McInnes 0,0,0, 1374598137ffSSatish Balay MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1375052efed2SBarry Smith MatPrintHelp_MPIAIJ, 13763b2fbd54SBarry Smith MatScale_MPIAIJ,0,0,0, 13770a198c4cSBarry Smith MatGetBlockSize_MPIAIJ,0,0,0,0, 1378bb5a7306SBarry Smith MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ}; 137936ce4990SBarry Smith 13808a729477SBarry Smith 13815615d1e5SSatish Balay #undef __FUNC__ 13825615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIAIJ" 13831987afe7SBarry Smith /*@C 1384ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 13853a511b96SLois Curfman McInnes (the default parallel PETSc format). For good matrix assembly performance 13863a511b96SLois Curfman McInnes the user should preallocate the matrix storage by setting the parameters 13873a511b96SLois Curfman McInnes d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 13883a511b96SLois Curfman McInnes performance can be increased by more than a factor of 50. 13898a729477SBarry Smith 13908a729477SBarry Smith Input Parameters: 13911eb62cbbSBarry Smith . comm - MPI communicator 13927d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 139392e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 139492e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 139592e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 139692e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 139792e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 13987d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 139992e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1400ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1401ff756334SLois Curfman McInnes (same for all local rows) 14022bd5e0b2SLois Curfman McInnes . d_nzz - array containing the number of nonzeros in the various rows of the 140392e8d321SLois Curfman McInnes diagonal portion of the local submatrix (possibly different for each row) 14042bd5e0b2SLois Curfman McInnes or PETSC_NULL. You must leave room for the diagonal entry even if 14052bd5e0b2SLois Curfman McInnes it is zero. 14062bd5e0b2SLois Curfman McInnes . o_nz - number of nonzeros per row in the off-diagonal portion of local 1407ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 14082bd5e0b2SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various rows of the 14092bd5e0b2SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 14102bd5e0b2SLois Curfman McInnes each row) or PETSC_NULL. 14118a729477SBarry Smith 1412ff756334SLois Curfman McInnes Output Parameter: 141344cd7ae7SLois Curfman McInnes . A - the matrix 14148a729477SBarry Smith 1415ff756334SLois Curfman McInnes Notes: 1416ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1417ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 14180002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 14190002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1420ff756334SLois Curfman McInnes 1421ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1422ff756334SLois Curfman McInnes (possibly both). 1423ff756334SLois Curfman McInnes 14245511cfe3SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 14255511cfe3SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 14265511cfe3SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 14275511cfe3SLois Curfman McInnes 14285511cfe3SLois Curfman McInnes Options Database Keys: 14295511cfe3SLois Curfman McInnes $ -mat_aij_no_inode - Do not use inodes 14306e62573dSLois Curfman McInnes $ -mat_aij_inode_limit <limit> - Set inode limit. 14316e62573dSLois Curfman McInnes $ (max limit=5) 14326e62573dSLois Curfman McInnes $ -mat_aij_oneindex - Internally use indexing starting at 1 14336e62573dSLois Curfman McInnes $ rather than 0. Note: When calling MatSetValues(), 14346e62573dSLois Curfman McInnes $ the user still MUST index entries starting at 0! 14355511cfe3SLois Curfman McInnes 1436e0245417SLois Curfman McInnes Storage Information: 1437e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1438e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1439e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1440e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1441e0245417SLois Curfman McInnes 1442e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 14435ace5be8SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set 14445ace5be8SLois Curfman McInnes d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 14455ace5be8SLois Curfman McInnes memory allocation. Likewise, specify preallocated storage for the 14465ace5be8SLois Curfman McInnes off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1447ff756334SLois Curfman McInnes 14485511cfe3SLois Curfman McInnes Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 14495511cfe3SLois Curfman McInnes the figure below we depict these three local rows and all columns (0-11). 14502191d07cSBarry Smith 1451b810aeb4SBarry Smith $ 0 1 2 3 4 5 6 7 8 9 10 11 1452b810aeb4SBarry Smith $ ------------------- 14535511cfe3SLois Curfman McInnes $ row 3 | o o o d d d o o o o o o 14545511cfe3SLois Curfman McInnes $ row 4 | o o o d d d o o o o o o 14555511cfe3SLois Curfman McInnes $ row 5 | o o o d d d o o o o o o 14565511cfe3SLois Curfman McInnes $ ------------------- 1457b810aeb4SBarry Smith $ 1458b810aeb4SBarry Smith 14595511cfe3SLois Curfman McInnes Thus, any entries in the d locations are stored in the d (diagonal) 14605511cfe3SLois Curfman McInnes submatrix, and any entries in the o locations are stored in the 14615511cfe3SLois Curfman McInnes o (off-diagonal) submatrix. Note that the d and the o submatrices are 14625511cfe3SLois Curfman McInnes stored simply in the MATSEQAIJ format for compressed row storage. 14635511cfe3SLois Curfman McInnes 14645511cfe3SLois Curfman McInnes Now d_nz should indicate the number of nonzeros per row in the d matrix, 14655511cfe3SLois Curfman McInnes and o_nz should indicate the number of nonzeros per row in the o matrix. 14665511cfe3SLois Curfman McInnes In general, for PDE problems in which most nonzeros are near the diagonal, 14673d323bbdSBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 146892e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 14696da5968aSLois Curfman McInnes matrices. 14703a511b96SLois Curfman McInnes 1471dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1472ff756334SLois Curfman McInnes 1473fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 14748a729477SBarry Smith @*/ 14751eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 147644cd7ae7SLois Curfman McInnes int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 14778a729477SBarry Smith { 147844cd7ae7SLois Curfman McInnes Mat B; 147944cd7ae7SLois Curfman McInnes Mat_MPIAIJ *b; 148036ce4990SBarry Smith int ierr, i,sum[2],work[2],size; 1481416022c9SBarry Smith 148244cd7ae7SLois Curfman McInnes *A = 0; 148344cd7ae7SLois Curfman McInnes PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 148444cd7ae7SLois Curfman McInnes PLogObjectCreate(B); 148544cd7ae7SLois Curfman McInnes B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 148644cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_MPIAIJ)); 148744cd7ae7SLois Curfman McInnes PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 148836ce4990SBarry Smith MPI_Comm_size(comm,&size); 148936ce4990SBarry Smith if (size == 1) { 149036ce4990SBarry Smith B->ops.getrowij = MatGetRowIJ_MPIAIJ; 149136ce4990SBarry Smith B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 149236ce4990SBarry Smith B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 149336ce4990SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 149436ce4990SBarry Smith B->ops.lufactor = MatLUFactor_MPIAIJ; 149536ce4990SBarry Smith B->ops.solve = MatSolve_MPIAIJ; 149636ce4990SBarry Smith B->ops.solveadd = MatSolveAdd_MPIAIJ; 149736ce4990SBarry Smith B->ops.solvetrans = MatSolveTrans_MPIAIJ; 149836ce4990SBarry Smith B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 149936ce4990SBarry Smith B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 150036ce4990SBarry Smith } 150144cd7ae7SLois Curfman McInnes B->destroy = MatDestroy_MPIAIJ; 150244cd7ae7SLois Curfman McInnes B->view = MatView_MPIAIJ; 150344cd7ae7SLois Curfman McInnes B->factor = 0; 150444cd7ae7SLois Curfman McInnes B->assembled = PETSC_FALSE; 150590f02eecSBarry Smith B->mapping = 0; 1506d6dfbf8fSBarry Smith 1507*47794344SBarry Smith B->insertmode = NOT_SET_VALUES; 150844cd7ae7SLois Curfman McInnes MPI_Comm_rank(comm,&b->rank); 150944cd7ae7SLois Curfman McInnes MPI_Comm_size(comm,&b->size); 15101eb62cbbSBarry Smith 1511b4fd4287SBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1512e3372554SBarry Smith SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 15131987afe7SBarry Smith 1514dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 15151eb62cbbSBarry Smith work[0] = m; work[1] = n; 1516d65a2f8fSBarry Smith MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1517dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1518dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 15191eb62cbbSBarry Smith } 152044cd7ae7SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 152144cd7ae7SLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 152244cd7ae7SLois Curfman McInnes b->m = m; B->m = m; 152344cd7ae7SLois Curfman McInnes b->n = n; B->n = n; 152444cd7ae7SLois Curfman McInnes b->N = N; B->N = N; 152544cd7ae7SLois Curfman McInnes b->M = M; B->M = M; 15261eb62cbbSBarry Smith 15271eb62cbbSBarry Smith /* build local table of row and column ownerships */ 152844cd7ae7SLois Curfman McInnes b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 152944cd7ae7SLois Curfman McInnes PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1530603f58a4SSatish Balay b->cowners = b->rowners + b->size + 2; 153144cd7ae7SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 153244cd7ae7SLois Curfman McInnes b->rowners[0] = 0; 153344cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 153444cd7ae7SLois Curfman McInnes b->rowners[i] += b->rowners[i-1]; 15358a729477SBarry Smith } 153644cd7ae7SLois Curfman McInnes b->rstart = b->rowners[b->rank]; 153744cd7ae7SLois Curfman McInnes b->rend = b->rowners[b->rank+1]; 153844cd7ae7SLois Curfman McInnes MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 153944cd7ae7SLois Curfman McInnes b->cowners[0] = 0; 154044cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 154144cd7ae7SLois Curfman McInnes b->cowners[i] += b->cowners[i-1]; 15428a729477SBarry Smith } 154344cd7ae7SLois Curfman McInnes b->cstart = b->cowners[b->rank]; 154444cd7ae7SLois Curfman McInnes b->cend = b->cowners[b->rank+1]; 15458a729477SBarry Smith 15465ace5be8SLois Curfman McInnes if (d_nz == PETSC_DEFAULT) d_nz = 5; 154744cd7ae7SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 154844cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->A); 15497b8455f0SLois Curfman McInnes if (o_nz == PETSC_DEFAULT) o_nz = 0; 155044cd7ae7SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 155144cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->B); 15528a729477SBarry Smith 15531eb62cbbSBarry Smith /* build cache for off array entries formed */ 155444cd7ae7SLois Curfman McInnes ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 155590f02eecSBarry Smith b->donotstash = 0; 155644cd7ae7SLois Curfman McInnes b->colmap = 0; 155744cd7ae7SLois Curfman McInnes b->garray = 0; 155844cd7ae7SLois Curfman McInnes b->roworiented = 1; 15598a729477SBarry Smith 15601eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 156144cd7ae7SLois Curfman McInnes b->lvec = 0; 156244cd7ae7SLois Curfman McInnes b->Mvctx = 0; 15638a729477SBarry Smith 15647a0afa10SBarry Smith /* stuff for MatGetRow() */ 156544cd7ae7SLois Curfman McInnes b->rowindices = 0; 156644cd7ae7SLois Curfman McInnes b->rowvalues = 0; 156744cd7ae7SLois Curfman McInnes b->getrowactive = PETSC_FALSE; 15687a0afa10SBarry Smith 156944cd7ae7SLois Curfman McInnes *A = B; 1570d6dfbf8fSBarry Smith return 0; 1571d6dfbf8fSBarry Smith } 1572c74985f6SBarry Smith 15735615d1e5SSatish Balay #undef __FUNC__ 15745615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIAIJ" 15753d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1576d6dfbf8fSBarry Smith { 1577d6dfbf8fSBarry Smith Mat mat; 1578416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1579a1b97e82SLois Curfman McInnes int ierr, len=0, flg; 1580d6dfbf8fSBarry Smith 1581416022c9SBarry Smith *newmat = 0; 15820452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1583a5a9c739SBarry Smith PLogObjectCreate(mat); 15840452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1585416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 158644a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 158744a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1588d6dfbf8fSBarry Smith mat->factor = matin->factor; 1589c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1590d6dfbf8fSBarry Smith 159144cd7ae7SLois Curfman McInnes a->m = mat->m = oldmat->m; 159244cd7ae7SLois Curfman McInnes a->n = mat->n = oldmat->n; 159344cd7ae7SLois Curfman McInnes a->M = mat->M = oldmat->M; 159444cd7ae7SLois Curfman McInnes a->N = mat->N = oldmat->N; 1595d6dfbf8fSBarry Smith 1596416022c9SBarry Smith a->rstart = oldmat->rstart; 1597416022c9SBarry Smith a->rend = oldmat->rend; 1598416022c9SBarry Smith a->cstart = oldmat->cstart; 1599416022c9SBarry Smith a->cend = oldmat->cend; 160017699dbbSLois Curfman McInnes a->size = oldmat->size; 160117699dbbSLois Curfman McInnes a->rank = oldmat->rank; 1602*47794344SBarry Smith mat->insertmode = NOT_SET_VALUES; 1603bcd2baecSBarry Smith a->rowvalues = 0; 1604bcd2baecSBarry Smith a->getrowactive = PETSC_FALSE; 1605d6dfbf8fSBarry Smith 1606603f58a4SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1607603f58a4SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1608603f58a4SSatish Balay a->cowners = a->rowners + a->size + 2; 1609603f58a4SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1610416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 16112ee70a88SLois Curfman McInnes if (oldmat->colmap) { 16120452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1613416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1614416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1615416022c9SBarry Smith } else a->colmap = 0; 16163f41c07dSBarry Smith if (oldmat->garray) { 16173f41c07dSBarry Smith len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 16183f41c07dSBarry Smith a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1619464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 16203f41c07dSBarry Smith if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1621416022c9SBarry Smith } else a->garray = 0; 1622d6dfbf8fSBarry Smith 1623416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1624416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1625a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1626416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1627416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1628416022c9SBarry Smith PLogObjectParent(mat,a->A); 1629416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1630416022c9SBarry Smith PLogObjectParent(mat,a->B); 16315dd7a6c7SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 163225cdf11fSBarry Smith if (flg) { 1633682d7d0cSBarry Smith ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1634682d7d0cSBarry Smith } 16358a729477SBarry Smith *newmat = mat; 16368a729477SBarry Smith return 0; 16378a729477SBarry Smith } 1638416022c9SBarry Smith 163977c4ece6SBarry Smith #include "sys.h" 1640416022c9SBarry Smith 16415615d1e5SSatish Balay #undef __FUNC__ 16425615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIAIJ" 164319bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1644416022c9SBarry Smith { 1645d65a2f8fSBarry Smith Mat A; 1646416022c9SBarry Smith int i, nz, ierr, j,rstart, rend, fd; 1647d65a2f8fSBarry Smith Scalar *vals,*svals; 164819bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 1649416022c9SBarry Smith MPI_Status status; 165017699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1651d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 165219bcc07fSBarry Smith int tag = ((PetscObject)viewer)->tag; 1653416022c9SBarry Smith 165417699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 165517699dbbSLois Curfman McInnes if (!rank) { 165690ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 165777c4ece6SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1658e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1659416022c9SBarry Smith } 1660416022c9SBarry Smith 1661416022c9SBarry Smith MPI_Bcast(header+1,3,MPI_INT,0,comm); 1662416022c9SBarry Smith M = header[1]; N = header[2]; 1663416022c9SBarry Smith /* determine ownership of all rows */ 166417699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 16650452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1666416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1667416022c9SBarry Smith rowners[0] = 0; 166817699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1669416022c9SBarry Smith rowners[i] += rowners[i-1]; 1670416022c9SBarry Smith } 167117699dbbSLois Curfman McInnes rstart = rowners[rank]; 167217699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1673416022c9SBarry Smith 1674416022c9SBarry Smith /* distribute row lengths to all processors */ 16750452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1676416022c9SBarry Smith offlens = ourlens + (rend-rstart); 167717699dbbSLois Curfman McInnes if (!rank) { 16780452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 167977c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 16800452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 168117699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1682d65a2f8fSBarry Smith MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 16830452661fSBarry Smith PetscFree(sndcounts); 1684416022c9SBarry Smith } 1685416022c9SBarry Smith else { 1686416022c9SBarry Smith MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1687416022c9SBarry Smith } 1688416022c9SBarry Smith 168917699dbbSLois Curfman McInnes if (!rank) { 1690416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 16910452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1692cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 169317699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1694416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1695416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1696416022c9SBarry Smith } 1697416022c9SBarry Smith } 16980452661fSBarry Smith PetscFree(rowlengths); 1699416022c9SBarry Smith 1700416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1701416022c9SBarry Smith maxnz = 0; 170217699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 17030452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 1704416022c9SBarry Smith } 17050452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1706416022c9SBarry Smith 1707416022c9SBarry Smith /* read in my part of the matrix column indices */ 1708416022c9SBarry Smith nz = procsnz[0]; 17090452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 171077c4ece6SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1711d65a2f8fSBarry Smith 1712d65a2f8fSBarry Smith /* read in every one elses and ship off */ 171317699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1714d65a2f8fSBarry Smith nz = procsnz[i]; 171577c4ece6SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1716d65a2f8fSBarry Smith MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1717d65a2f8fSBarry Smith } 17180452661fSBarry Smith PetscFree(cols); 1719416022c9SBarry Smith } 1720416022c9SBarry Smith else { 1721416022c9SBarry Smith /* determine buffer space needed for message */ 1722416022c9SBarry Smith nz = 0; 1723416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1724416022c9SBarry Smith nz += ourlens[i]; 1725416022c9SBarry Smith } 17260452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1727416022c9SBarry Smith 1728416022c9SBarry Smith /* receive message of column indices*/ 1729d65a2f8fSBarry Smith MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1730416022c9SBarry Smith MPI_Get_count(&status,MPI_INT,&maxnz); 1731e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1732416022c9SBarry Smith } 1733416022c9SBarry Smith 1734416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1735cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 1736416022c9SBarry Smith jj = 0; 1737416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1738416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1739d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1740416022c9SBarry Smith jj++; 1741416022c9SBarry Smith } 1742416022c9SBarry Smith } 1743d65a2f8fSBarry Smith 1744d65a2f8fSBarry Smith /* create our matrix */ 1745416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1746416022c9SBarry Smith ourlens[i] -= offlens[i]; 1747416022c9SBarry Smith } 1748d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1749d65a2f8fSBarry Smith A = *newmat; 17506d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 1751d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1752d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1753d65a2f8fSBarry Smith } 1754416022c9SBarry Smith 175517699dbbSLois Curfman McInnes if (!rank) { 17560452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1757416022c9SBarry Smith 1758416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1759416022c9SBarry Smith nz = procsnz[0]; 176077c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1761d65a2f8fSBarry Smith 1762d65a2f8fSBarry Smith /* insert into matrix */ 1763d65a2f8fSBarry Smith jj = rstart; 1764d65a2f8fSBarry Smith smycols = mycols; 1765d65a2f8fSBarry Smith svals = vals; 1766d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1767d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1768d65a2f8fSBarry Smith smycols += ourlens[i]; 1769d65a2f8fSBarry Smith svals += ourlens[i]; 1770d65a2f8fSBarry Smith jj++; 1771416022c9SBarry Smith } 1772416022c9SBarry Smith 1773d65a2f8fSBarry Smith /* read in other processors and ship out */ 177417699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1775416022c9SBarry Smith nz = procsnz[i]; 177677c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1777d65a2f8fSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1778416022c9SBarry Smith } 17790452661fSBarry Smith PetscFree(procsnz); 1780416022c9SBarry Smith } 1781d65a2f8fSBarry Smith else { 1782d65a2f8fSBarry Smith /* receive numeric values */ 17830452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1784416022c9SBarry Smith 1785d65a2f8fSBarry Smith /* receive message of values*/ 1786d65a2f8fSBarry Smith MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1787d65a2f8fSBarry Smith MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1788e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1789d65a2f8fSBarry Smith 1790d65a2f8fSBarry Smith /* insert into matrix */ 1791d65a2f8fSBarry Smith jj = rstart; 1792d65a2f8fSBarry Smith smycols = mycols; 1793d65a2f8fSBarry Smith svals = vals; 1794d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1795d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1796d65a2f8fSBarry Smith smycols += ourlens[i]; 1797d65a2f8fSBarry Smith svals += ourlens[i]; 1798d65a2f8fSBarry Smith jj++; 1799d65a2f8fSBarry Smith } 1800d65a2f8fSBarry Smith } 18010452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1802d65a2f8fSBarry Smith 18036d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 18046d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1805416022c9SBarry Smith return 0; 1806416022c9SBarry Smith } 1807