1cb512458SBarry Smith #ifndef lint 2*f40265daSLois Curfman McInnes static char vcid[] = "$Id: mpiaij.c,v 1.189 1997/01/27 18:16:42 bsmith Exp curfman $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 570f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h" 6f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 7d9942c19SSatish Balay #include "src/inline/spops.h" 88a729477SBarry Smith 99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 119e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 129e25ed09SBarry Smith length of colmap equals the global matrix length. 139e25ed09SBarry Smith */ 145615d1e5SSatish Balay #undef __FUNC__ 155615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIAIJ_Private" 160a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat) 179e25ed09SBarry Smith { 1844a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 19ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 20905e6a2fSBarry Smith int n = B->n,i; 21dbb450caSBarry Smith 220452661fSBarry Smith aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 23464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 24cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 25905e6a2fSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 269e25ed09SBarry Smith return 0; 279e25ed09SBarry Smith } 289e25ed09SBarry Smith 292493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 302493cbb0SBarry Smith 315615d1e5SSatish Balay #undef __FUNC__ 325615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIAIJ" 333b2fbd54SBarry Smith static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 343b2fbd54SBarry Smith PetscTruth *done) 35299609e3SLois Curfman McInnes { 36299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 37299609e3SLois Curfman McInnes int ierr; 3817699dbbSLois Curfman McInnes if (aij->size == 1) { 393b2fbd54SBarry Smith ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 40e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 413b2fbd54SBarry Smith return 0; 423b2fbd54SBarry Smith } 433b2fbd54SBarry Smith 445615d1e5SSatish Balay #undef __FUNC__ 455615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" 463b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 473b2fbd54SBarry Smith PetscTruth *done) 483b2fbd54SBarry Smith { 493b2fbd54SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 503b2fbd54SBarry Smith int ierr; 513b2fbd54SBarry Smith if (aij->size == 1) { 523b2fbd54SBarry Smith ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 53e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 54299609e3SLois Curfman McInnes return 0; 55299609e3SLois Curfman McInnes } 56299609e3SLois Curfman McInnes 570520107fSSatish Balay #define CHUNKSIZE 15 58f5e9677aSSatish Balay #define MatSetValues_SeqAIJ_A_Private(A,row,col,value,addv) \ 590520107fSSatish Balay { \ 600520107fSSatish Balay \ 610520107fSSatish Balay rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 620520107fSSatish Balay rmax = imax[row]; nrow = ailen[row]; \ 63f5e9677aSSatish Balay col1 = col - shift; \ 64f5e9677aSSatish Balay \ 650520107fSSatish Balay for ( _i=0; _i<nrow; _i++ ) { \ 66f5e9677aSSatish Balay if (rp[_i] > col1) break; \ 67f5e9677aSSatish Balay if (rp[_i] == col1) { \ 680520107fSSatish Balay if (addv == ADD_VALUES) ap[_i] += value; \ 690520107fSSatish Balay else ap[_i] = value; \ 700520107fSSatish Balay goto _noinsert; \ 710520107fSSatish Balay } \ 720520107fSSatish Balay } \ 73f5e9677aSSatish Balay if (nonew) goto _noinsert; \ 740520107fSSatish Balay if (nrow >= rmax) { \ 750520107fSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 760520107fSSatish Balay int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 770520107fSSatish Balay Scalar *new_a; \ 780520107fSSatish Balay \ 790520107fSSatish Balay /* malloc new storage space */ \ 800520107fSSatish Balay len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 810520107fSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 820520107fSSatish Balay new_j = (int *) (new_a + new_nz); \ 830520107fSSatish Balay new_i = new_j + new_nz; \ 840520107fSSatish Balay \ 850520107fSSatish Balay /* copy over old data into new slots */ \ 860520107fSSatish Balay for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 870520107fSSatish Balay for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 880520107fSSatish Balay PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 890520107fSSatish Balay len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 900520107fSSatish Balay PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 910520107fSSatish Balay len*sizeof(int)); \ 920520107fSSatish Balay PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 930520107fSSatish Balay PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 940520107fSSatish Balay len*sizeof(Scalar)); \ 950520107fSSatish Balay /* free up old matrix storage */ \ 96f5e9677aSSatish Balay \ 970520107fSSatish Balay PetscFree(a->a); \ 980520107fSSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 990520107fSSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 1000520107fSSatish Balay a->singlemalloc = 1; \ 1010520107fSSatish Balay \ 1020520107fSSatish Balay rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 1030520107fSSatish Balay rmax = imax[row] = imax[row] + CHUNKSIZE; \ 1040520107fSSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 1050520107fSSatish Balay a->maxnz += CHUNKSIZE; \ 1060520107fSSatish Balay a->reallocs++; \ 1070520107fSSatish Balay } \ 1080520107fSSatish Balay N = nrow++ - 1; a->nz++; \ 1090520107fSSatish Balay /* shift up all the later entries in this row */ \ 1100520107fSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 1110520107fSSatish Balay rp[ii+1] = rp[ii]; \ 1120520107fSSatish Balay ap[ii+1] = ap[ii]; \ 1130520107fSSatish Balay } \ 114f5e9677aSSatish Balay rp[_i] = col1; \ 1150520107fSSatish Balay ap[_i] = value; \ 1160520107fSSatish Balay _noinsert: ; \ 1170520107fSSatish Balay ailen[row] = nrow; \ 1180520107fSSatish Balay } 1190a198c4cSBarry Smith 1200520107fSSatish Balay extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 1215615d1e5SSatish Balay #undef __FUNC__ 1225615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIAIJ" 1234b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 1248a729477SBarry Smith { 12544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1264b0e389bSBarry Smith Scalar value; 1271eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 1281eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 129905e6a2fSBarry Smith int roworiented = aij->roworiented; 1308a729477SBarry Smith 1310520107fSSatish Balay /* Some Variables required in the macro */ 1324ee7247eSSatish Balay Mat A = aij->A; 1334ee7247eSSatish Balay Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 1344ee7247eSSatish Balay int *rp,ii,nrow,_i,rmax, N, col1; 1350520107fSSatish Balay int *imax = a->imax, *ai = a->i, *ailen = a->ilen; 1364ee7247eSSatish Balay int *aj = a->j, nonew = a->nonew,shift = a->indexshift; 1370520107fSSatish Balay Scalar *ap, *aa = a->a; 1384ee7247eSSatish Balay 1390a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 14064119d58SLois Curfman McInnes if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 141e3372554SBarry Smith SETERRQ(1,0,"Cannot mix inserts and adds"); 1428a729477SBarry Smith } 1430a198c4cSBarry Smith #endif 1441eb62cbbSBarry Smith aij->insertmode = addv; 1458a729477SBarry Smith for ( i=0; i<m; i++ ) { 1460a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 147e3372554SBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 148e3372554SBarry Smith if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 1490a198c4cSBarry Smith #endif 1504b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 1514b0e389bSBarry Smith row = im[i] - rstart; 1521eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 1534b0e389bSBarry Smith if (in[j] >= cstart && in[j] < cend){ 1544b0e389bSBarry Smith col = in[j] - cstart; 1554b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 156f5e9677aSSatish Balay MatSetValues_SeqAIJ_A_Private(aij->A,row,col,value,addv); 1570520107fSSatish Balay /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 1581eb62cbbSBarry Smith } 1590a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 160e3372554SBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 161e3372554SBarry Smith else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 1620a198c4cSBarry Smith #endif 1631eb62cbbSBarry Smith else { 164227d817aSBarry Smith if (mat->was_assembled) { 165905e6a2fSBarry Smith if (!aij->colmap) { 166905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 167905e6a2fSBarry Smith } 168905e6a2fSBarry Smith col = aij->colmap[in[j]] - 1; 169ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 1702493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 1714b0e389bSBarry Smith col = in[j]; 172d6dfbf8fSBarry Smith } 1739e25ed09SBarry Smith } 1744b0e389bSBarry Smith else col = in[j]; 1754b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 1760a198c4cSBarry Smith ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 1771eb62cbbSBarry Smith } 1781eb62cbbSBarry Smith } 1791eb62cbbSBarry Smith } 1801eb62cbbSBarry Smith else { 18190f02eecSBarry Smith if (roworiented && !aij->donotstash) { 1824b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 1834b0e389bSBarry Smith } 1844b0e389bSBarry Smith else { 18590f02eecSBarry Smith if (!aij->donotstash) { 1864b0e389bSBarry Smith row = im[i]; 1874b0e389bSBarry Smith for ( j=0; j<n; j++ ) { 1884b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 1894b0e389bSBarry Smith } 1904b0e389bSBarry Smith } 1911eb62cbbSBarry Smith } 1928a729477SBarry Smith } 19390f02eecSBarry Smith } 1948a729477SBarry Smith return 0; 1958a729477SBarry Smith } 1968a729477SBarry Smith 1975615d1e5SSatish Balay #undef __FUNC__ 1985615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIAIJ" 199b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 200b49de8d1SLois Curfman McInnes { 201b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 202b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 203b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 204b49de8d1SLois Curfman McInnes 205b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 206e3372554SBarry Smith if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 207e3372554SBarry Smith if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 208b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 209b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 210b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 211e3372554SBarry Smith if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 212e3372554SBarry Smith if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 213b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 214b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 215b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 216b49de8d1SLois Curfman McInnes } 217b49de8d1SLois Curfman McInnes else { 218905e6a2fSBarry Smith if (!aij->colmap) { 219905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 220905e6a2fSBarry Smith } 221905e6a2fSBarry Smith col = aij->colmap[idxn[j]] - 1; 222e60e1c95SSatish Balay if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 223d9d09a02SSatish Balay else { 224b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 225b49de8d1SLois Curfman McInnes } 226b49de8d1SLois Curfman McInnes } 227b49de8d1SLois Curfman McInnes } 228d9d09a02SSatish Balay } 229b49de8d1SLois Curfman McInnes else { 230e3372554SBarry Smith SETERRQ(1,0,"Only local values currently supported"); 231b49de8d1SLois Curfman McInnes } 232b49de8d1SLois Curfman McInnes } 233b49de8d1SLois Curfman McInnes return 0; 234b49de8d1SLois Curfman McInnes } 235b49de8d1SLois Curfman McInnes 2365615d1e5SSatish Balay #undef __FUNC__ 2375615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 238fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 2398a729477SBarry Smith { 24044a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 241d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 24217699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 24317699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 2441eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 2456abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 2461eb62cbbSBarry Smith InsertMode addv; 2471eb62cbbSBarry Smith Scalar *rvalues,*svalues; 2481eb62cbbSBarry Smith 2491eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 250d65a2f8fSBarry Smith MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 251dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 252e3372554SBarry Smith SETERRQ(1,0,"Some processors inserted others added"); 2531eb62cbbSBarry Smith } 2541eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 2551eb62cbbSBarry Smith 2561eb62cbbSBarry Smith /* first count number of contributors to each processor */ 2570452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 258cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 2590452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 2601eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 2611eb62cbbSBarry Smith idx = aij->stash.idx[i]; 26217699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 2631eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 2641eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 2658a729477SBarry Smith } 2668a729477SBarry Smith } 2678a729477SBarry Smith } 26817699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 2691eb62cbbSBarry Smith 2701eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 2710452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 27217699dbbSLois Curfman McInnes MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 27317699dbbSLois Curfman McInnes nreceives = work[rank]; 27417699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 27517699dbbSLois Curfman McInnes nmax = work[rank]; 2760452661fSBarry Smith PetscFree(work); 2771eb62cbbSBarry Smith 2781eb62cbbSBarry Smith /* post receives: 2791eb62cbbSBarry Smith 1) each message will consist of ordered pairs 2801eb62cbbSBarry Smith (global index,value) we store the global index as a double 281d6dfbf8fSBarry Smith to simplify the message passing. 2821eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 2831eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 2841eb62cbbSBarry Smith this is a lot of wasted space. 2851eb62cbbSBarry Smith 2861eb62cbbSBarry Smith 2871eb62cbbSBarry Smith This could be done better. 2881eb62cbbSBarry Smith */ 2890452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 29078b31e54SBarry Smith CHKPTRQ(rvalues); 2910452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 29278b31e54SBarry Smith CHKPTRQ(recv_waits); 2931eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 294416022c9SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 2951eb62cbbSBarry Smith comm,recv_waits+i); 2961eb62cbbSBarry Smith } 2971eb62cbbSBarry Smith 2981eb62cbbSBarry Smith /* do sends: 2991eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3001eb62cbbSBarry Smith the ith processor 3011eb62cbbSBarry Smith */ 3020452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 3030452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 30478b31e54SBarry Smith CHKPTRQ(send_waits); 3050452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 3061eb62cbbSBarry Smith starts[0] = 0; 30717699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3081eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 3091eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 3101eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 3111eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 3121eb62cbbSBarry Smith } 3130452661fSBarry Smith PetscFree(owner); 3141eb62cbbSBarry Smith starts[0] = 0; 31517699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3161eb62cbbSBarry Smith count = 0; 31717699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 3181eb62cbbSBarry Smith if (procs[i]) { 319416022c9SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 3201eb62cbbSBarry Smith comm,send_waits+count++); 3211eb62cbbSBarry Smith } 3221eb62cbbSBarry Smith } 3230452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 3241eb62cbbSBarry Smith 3251eb62cbbSBarry Smith /* Free cache space */ 32655908cccSLois Curfman McInnes PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 32778b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 3281eb62cbbSBarry Smith 3291eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 3301eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 3311eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 3321eb62cbbSBarry Smith aij->rmax = nmax; 3331eb62cbbSBarry Smith 3341eb62cbbSBarry Smith return 0; 3351eb62cbbSBarry Smith } 33644a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 3371eb62cbbSBarry Smith 3385615d1e5SSatish Balay #undef __FUNC__ 3395615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 340fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 3411eb62cbbSBarry Smith { 34244a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 3431eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 344416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 345905e6a2fSBarry Smith int row,col,other_disassembled; 3461eb62cbbSBarry Smith Scalar *values,val; 3471eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 3481eb62cbbSBarry Smith 3491eb62cbbSBarry Smith /* wait on receives */ 3501eb62cbbSBarry Smith while (count) { 351d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 3521eb62cbbSBarry Smith /* unpack receives into our local space */ 353d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 354c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 3551eb62cbbSBarry Smith n = n/3; 3561eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 357227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - aij->rstart; 358227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 3591eb62cbbSBarry Smith val = values[3*i+2]; 3601eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 3611eb62cbbSBarry Smith col -= aij->cstart; 3621eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 3631eb62cbbSBarry Smith } 3641eb62cbbSBarry Smith else { 36555a1b374SBarry Smith if (mat->was_assembled || mat->assembled) { 366905e6a2fSBarry Smith if (!aij->colmap) { 367905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 368905e6a2fSBarry Smith } 369905e6a2fSBarry Smith col = aij->colmap[col] - 1; 370ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 3712493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 372227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 373d6dfbf8fSBarry Smith } 3749e25ed09SBarry Smith } 3751eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 3761eb62cbbSBarry Smith } 3771eb62cbbSBarry Smith } 3781eb62cbbSBarry Smith count--; 3791eb62cbbSBarry Smith } 3800452661fSBarry Smith PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 3811eb62cbbSBarry Smith 3821eb62cbbSBarry Smith /* wait on sends */ 3831eb62cbbSBarry Smith if (aij->nsends) { 3840a198c4cSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 3851eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 3860452661fSBarry Smith PetscFree(send_status); 3871eb62cbbSBarry Smith } 3880452661fSBarry Smith PetscFree(aij->send_waits); PetscFree(aij->svalues); 3891eb62cbbSBarry Smith 39064119d58SLois Curfman McInnes aij->insertmode = NOT_SET_VALUES; 39178b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 39278b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 3931eb62cbbSBarry Smith 3942493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 3952493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 396227d817aSBarry Smith MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 397227d817aSBarry Smith if (mat->was_assembled && !other_disassembled) { 3982493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 3992493cbb0SBarry Smith } 4002493cbb0SBarry Smith 4016d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 40278b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 4035e42470aSBarry Smith } 40478b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 40578b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 4065e42470aSBarry Smith 4077a0afa10SBarry Smith if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 4088a729477SBarry Smith return 0; 4098a729477SBarry Smith } 4108a729477SBarry Smith 4115615d1e5SSatish Balay #undef __FUNC__ 4125615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIAIJ" 4132ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A) 4141eb62cbbSBarry Smith { 41544a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 416dbd7a890SLois Curfman McInnes int ierr; 41778b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 41878b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 4191eb62cbbSBarry Smith return 0; 4201eb62cbbSBarry Smith } 4211eb62cbbSBarry Smith 4221eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 4231eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 4241eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 4251eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 4261eb62cbbSBarry Smith routine. 4271eb62cbbSBarry Smith */ 4285615d1e5SSatish Balay #undef __FUNC__ 4295615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIAIJ" 43044a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 4311eb62cbbSBarry Smith { 43244a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 43317699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 4346abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 43517699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 4365392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 437d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 438d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 4391eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 4401eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 4411eb62cbbSBarry Smith IS istmp; 4421eb62cbbSBarry Smith 44377c4ece6SBarry Smith ierr = ISGetSize(is,&N); CHKERRQ(ierr); 44478b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 4451eb62cbbSBarry Smith 4461eb62cbbSBarry Smith /* first count number of contributors to each processor */ 4470452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 448cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 4490452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 4501eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 4511eb62cbbSBarry Smith idx = rows[i]; 4521eb62cbbSBarry Smith found = 0; 45317699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 4541eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 4551eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 4561eb62cbbSBarry Smith } 4571eb62cbbSBarry Smith } 458e3372554SBarry Smith if (!found) SETERRQ(1,0,"Index out of range"); 4591eb62cbbSBarry Smith } 46017699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 4611eb62cbbSBarry Smith 4621eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 4630452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 46417699dbbSLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 46517699dbbSLois Curfman McInnes nrecvs = work[rank]; 46617699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 46717699dbbSLois Curfman McInnes nmax = work[rank]; 4680452661fSBarry Smith PetscFree(work); 4691eb62cbbSBarry Smith 4701eb62cbbSBarry Smith /* post receives: */ 4710452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 47278b31e54SBarry Smith CHKPTRQ(rvalues); 4730452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 47478b31e54SBarry Smith CHKPTRQ(recv_waits); 4751eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 476416022c9SBarry Smith MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 4771eb62cbbSBarry Smith } 4781eb62cbbSBarry Smith 4791eb62cbbSBarry Smith /* do sends: 4801eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 4811eb62cbbSBarry Smith the ith processor 4821eb62cbbSBarry Smith */ 4830452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 4840452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 48578b31e54SBarry Smith CHKPTRQ(send_waits); 4860452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 4871eb62cbbSBarry Smith starts[0] = 0; 48817699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 4891eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 4901eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 4911eb62cbbSBarry Smith } 4921eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 4931eb62cbbSBarry Smith 4941eb62cbbSBarry Smith starts[0] = 0; 49517699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 4961eb62cbbSBarry Smith count = 0; 49717699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 4981eb62cbbSBarry Smith if (procs[i]) { 499416022c9SBarry Smith MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 5001eb62cbbSBarry Smith } 5011eb62cbbSBarry Smith } 5020452661fSBarry Smith PetscFree(starts); 5031eb62cbbSBarry Smith 50417699dbbSLois Curfman McInnes base = owners[rank]; 5051eb62cbbSBarry Smith 5061eb62cbbSBarry Smith /* wait on receives */ 5070452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 5081eb62cbbSBarry Smith source = lens + nrecvs; 5091eb62cbbSBarry Smith count = nrecvs; slen = 0; 5101eb62cbbSBarry Smith while (count) { 511d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 5121eb62cbbSBarry Smith /* unpack receives into our local space */ 5131eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 514d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 515d6dfbf8fSBarry Smith lens[imdex] = n; 5161eb62cbbSBarry Smith slen += n; 5171eb62cbbSBarry Smith count--; 5181eb62cbbSBarry Smith } 5190452661fSBarry Smith PetscFree(recv_waits); 5201eb62cbbSBarry Smith 5211eb62cbbSBarry Smith /* move the data into the send scatter */ 5220452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 5231eb62cbbSBarry Smith count = 0; 5241eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 5251eb62cbbSBarry Smith values = rvalues + i*nmax; 5261eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 5271eb62cbbSBarry Smith lrows[count++] = values[j] - base; 5281eb62cbbSBarry Smith } 5291eb62cbbSBarry Smith } 5300452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 5310452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 5321eb62cbbSBarry Smith 5331eb62cbbSBarry Smith /* actually zap the local rows */ 534537820f0SBarry Smith ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 535464493b3SBarry Smith PLogObjectParent(A,istmp); 5360452661fSBarry Smith PetscFree(lrows); 53778b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 53878b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 53978b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 5401eb62cbbSBarry Smith 5411eb62cbbSBarry Smith /* wait on sends */ 5421eb62cbbSBarry Smith if (nsends) { 5430452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 54478b31e54SBarry Smith CHKPTRQ(send_status); 5451eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 5460452661fSBarry Smith PetscFree(send_status); 5471eb62cbbSBarry Smith } 5480452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 5491eb62cbbSBarry Smith 5501eb62cbbSBarry Smith return 0; 5511eb62cbbSBarry Smith } 5521eb62cbbSBarry Smith 5535615d1e5SSatish Balay #undef __FUNC__ 5545615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIAIJ" 555416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 5561eb62cbbSBarry Smith { 557416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 558fbd6ef76SBarry Smith int ierr,nt; 559416022c9SBarry Smith 560a2ce50c7SBarry Smith ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 561fbd6ef76SBarry Smith if (nt != a->n) { 562*f40265daSLois Curfman McInnes SETERRQ(1,0,"Incompatible partition of A and xx"); 563fbd6ef76SBarry Smith } 56443a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 56538597bd4SSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 56643a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 56738597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 5681eb62cbbSBarry Smith return 0; 5691eb62cbbSBarry Smith } 5701eb62cbbSBarry Smith 5715615d1e5SSatish Balay #undef __FUNC__ 5725615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIAIJ" 573416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 574da3a660dSBarry Smith { 575416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 576da3a660dSBarry Smith int ierr; 57743a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 57838597bd4SSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 57943a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 58038597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 581da3a660dSBarry Smith return 0; 582da3a660dSBarry Smith } 583da3a660dSBarry Smith 5845615d1e5SSatish Balay #undef __FUNC__ 5855615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIAIJ" 586416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 587da3a660dSBarry Smith { 588416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 589da3a660dSBarry Smith int ierr; 590da3a660dSBarry Smith 591da3a660dSBarry Smith /* do nondiagonal part */ 59238597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 593da3a660dSBarry Smith /* send it on its way */ 594537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 595da3a660dSBarry Smith /* do local part */ 59638597bd4SSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 597da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 598da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 599da3a660dSBarry Smith /* but is not perhaps always true. */ 600537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 601da3a660dSBarry Smith return 0; 602da3a660dSBarry Smith } 603da3a660dSBarry Smith 6045615d1e5SSatish Balay #undef __FUNC__ 6055615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIAIJ" 606416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 607da3a660dSBarry Smith { 608416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 609da3a660dSBarry Smith int ierr; 610da3a660dSBarry Smith 611da3a660dSBarry Smith /* do nondiagonal part */ 61238597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 613da3a660dSBarry Smith /* send it on its way */ 614537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 615da3a660dSBarry Smith /* do local part */ 61638597bd4SSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 617da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 618da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 619da3a660dSBarry Smith /* but is not perhaps always true. */ 6200a198c4cSBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 621da3a660dSBarry Smith return 0; 622da3a660dSBarry Smith } 623da3a660dSBarry Smith 6241eb62cbbSBarry Smith /* 6251eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 6261eb62cbbSBarry Smith diagonal block 6271eb62cbbSBarry Smith */ 6285615d1e5SSatish Balay #undef __FUNC__ 6295615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIAIJ" 630416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 6311eb62cbbSBarry Smith { 632416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 63344cd7ae7SLois Curfman McInnes if (a->M != a->N) 634e3372554SBarry Smith SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 6355baf8537SBarry Smith if (a->rstart != a->cstart || a->rend != a->cend) { 636e3372554SBarry Smith SETERRQ(1,0,"row partition must equal col partition"); } 637416022c9SBarry Smith return MatGetDiagonal(a->A,v); 6381eb62cbbSBarry Smith } 6391eb62cbbSBarry Smith 6405615d1e5SSatish Balay #undef __FUNC__ 6415615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIAIJ" 642052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A) 643052efed2SBarry Smith { 644052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 645052efed2SBarry Smith int ierr; 646052efed2SBarry Smith ierr = MatScale(aa,a->A); CHKERRQ(ierr); 647052efed2SBarry Smith ierr = MatScale(aa,a->B); CHKERRQ(ierr); 648052efed2SBarry Smith return 0; 649052efed2SBarry Smith } 650052efed2SBarry Smith 6515615d1e5SSatish Balay #undef __FUNC__ 6525615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIAIJ" 65344a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj) 6541eb62cbbSBarry Smith { 6551eb62cbbSBarry Smith Mat mat = (Mat) obj; 65644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 6571eb62cbbSBarry Smith int ierr; 658a5a9c739SBarry Smith #if defined(PETSC_LOG) 6596e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 660a5a9c739SBarry Smith #endif 6610452661fSBarry Smith PetscFree(aij->rowners); 66278b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 66378b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 6640452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 6650452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 6661eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 667a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 6687a0afa10SBarry Smith if (aij->rowvalues) PetscFree(aij->rowvalues); 6690452661fSBarry Smith PetscFree(aij); 67090f02eecSBarry Smith if (mat->mapping) { 67190f02eecSBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 67290f02eecSBarry Smith } 673a5a9c739SBarry Smith PLogObjectDestroy(mat); 6740452661fSBarry Smith PetscHeaderDestroy(mat); 6751eb62cbbSBarry Smith return 0; 6761eb62cbbSBarry Smith } 6777857610eSBarry Smith #include "draw.h" 678b16a3bb1SBarry Smith #include "pinclude/pviewer.h" 679ee50ffe9SBarry Smith 6805615d1e5SSatish Balay #undef __FUNC__ 6815615d1e5SSatish Balay #define __FUNC__ "MatView_MPIAIJ_Binary" 682416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 6831eb62cbbSBarry Smith { 684416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 685416022c9SBarry Smith int ierr; 686416022c9SBarry Smith 68717699dbbSLois Curfman McInnes if (aij->size == 1) { 688416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 689416022c9SBarry Smith } 690e3372554SBarry Smith else SETERRQ(1,0,"Only uniprocessor output supported"); 691416022c9SBarry Smith return 0; 692416022c9SBarry Smith } 693416022c9SBarry Smith 6945615d1e5SSatish Balay #undef __FUNC__ 6955615d1e5SSatish Balay #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 696d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 697416022c9SBarry Smith { 69844a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 699dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 700a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 701d636dbe3SBarry Smith FILE *fd; 70219bcc07fSBarry Smith ViewerType vtype; 703416022c9SBarry Smith 70419bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 70519bcc07fSBarry Smith if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 70690ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 7070a198c4cSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 7084e220ebcSLois Curfman McInnes MatInfo info; 7094e220ebcSLois Curfman McInnes int flg; 710a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 71190ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 7124e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 71395e01e2fSLois Curfman McInnes ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 71477c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 71595e01e2fSLois Curfman McInnes if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 7164e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 71795e01e2fSLois Curfman McInnes else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 7184e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 7194e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 7204e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 7214e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 7224e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 723a56f8943SBarry Smith fflush(fd); 72477c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 725a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 726416022c9SBarry Smith return 0; 727416022c9SBarry Smith } 7280a198c4cSBarry Smith else if (format == VIEWER_FORMAT_ASCII_INFO) { 72908480c60SBarry Smith return 0; 73008480c60SBarry Smith } 731416022c9SBarry Smith } 732416022c9SBarry Smith 73319bcc07fSBarry Smith if (vtype == DRAW_VIEWER) { 73419bcc07fSBarry Smith Draw draw; 73519bcc07fSBarry Smith PetscTruth isnull; 73619bcc07fSBarry Smith ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 73719bcc07fSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 73819bcc07fSBarry Smith } 73919bcc07fSBarry Smith 74019bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER) { 74190ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 74277c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 743d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 74417699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 7451eb62cbbSBarry Smith aij->cend); 74678b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 74778b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 748d13ab20cSBarry Smith fflush(fd); 74977c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 750d13ab20cSBarry Smith } 751416022c9SBarry Smith else { 752a56f8943SBarry Smith int size = aij->size; 753a56f8943SBarry Smith rank = aij->rank; 75417699dbbSLois Curfman McInnes if (size == 1) { 75578b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 75695373324SBarry Smith } 75795373324SBarry Smith else { 75895373324SBarry Smith /* assemble the entire matrix onto first processor. */ 75995373324SBarry Smith Mat A; 760ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 7612eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 76295373324SBarry Smith Scalar *a; 7632ee70a88SLois Curfman McInnes 76417699dbbSLois Curfman McInnes if (!rank) { 765b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 766c451ab25SLois Curfman McInnes CHKERRQ(ierr); 76795373324SBarry Smith } 76895373324SBarry Smith else { 769b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 770c451ab25SLois Curfman McInnes CHKERRQ(ierr); 77195373324SBarry Smith } 772464493b3SBarry Smith PLogObjectParent(mat,A); 773416022c9SBarry Smith 77495373324SBarry Smith /* copy over the A part */ 775ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 7762ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 77795373324SBarry Smith row = aij->rstart; 778dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 77995373324SBarry Smith for ( i=0; i<m; i++ ) { 780416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 78195373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 78295373324SBarry Smith } 7832ee70a88SLois Curfman McInnes aj = Aloc->j; 784dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 78595373324SBarry Smith 78695373324SBarry Smith /* copy over the B part */ 787ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 7882ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 78995373324SBarry Smith row = aij->rstart; 7900452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 791dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 79295373324SBarry Smith for ( i=0; i<m; i++ ) { 793416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 79495373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 79595373324SBarry Smith } 7960452661fSBarry Smith PetscFree(ct); 7976d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 7986d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 79917699dbbSLois Curfman McInnes if (!rank) { 80078b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 80195373324SBarry Smith } 80278b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 80395373324SBarry Smith } 80495373324SBarry Smith } 8051eb62cbbSBarry Smith return 0; 8061eb62cbbSBarry Smith } 8071eb62cbbSBarry Smith 8085615d1e5SSatish Balay #undef __FUNC__ 8095615d1e5SSatish Balay #define __FUNC__ "MatView_MPIAIJ" 810416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 811416022c9SBarry Smith { 812416022c9SBarry Smith Mat mat = (Mat) obj; 813416022c9SBarry Smith int ierr; 81419bcc07fSBarry Smith ViewerType vtype; 815416022c9SBarry Smith 81619bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 81719bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 81819bcc07fSBarry Smith vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 819d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 820416022c9SBarry Smith } 82119bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 822416022c9SBarry Smith return MatView_MPIAIJ_Binary(mat,viewer); 823416022c9SBarry Smith } 824416022c9SBarry Smith return 0; 825416022c9SBarry Smith } 826416022c9SBarry Smith 8271eb62cbbSBarry Smith /* 8281eb62cbbSBarry Smith This has to provide several versions. 8291eb62cbbSBarry Smith 8301eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 8311eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 832d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 8331eb62cbbSBarry Smith */ 8345615d1e5SSatish Balay #undef __FUNC__ 8355615d1e5SSatish Balay #define __FUNC__ "MatRelax_MPIAIJ" 836fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 837dbb450caSBarry Smith double fshift,int its,Vec xx) 8388a729477SBarry Smith { 83944a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 840d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 841ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 842c16cb8f2SBarry Smith Scalar *b,*x,*xs,*ls,d,*v,sum; 8436abc6512SBarry Smith int ierr,*idx, *diag; 844416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 8458a729477SBarry Smith 846d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 847dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 848dbb450caSBarry Smith ls = ls + shift; 849ec8511deSBarry Smith if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 850d6dfbf8fSBarry Smith diag = A->diag; 851c16cb8f2SBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 852da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 85338597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 854da3a660dSBarry Smith } 85543a90d84SBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 85678b31e54SBarry Smith CHKERRQ(ierr); 85743a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 85878b31e54SBarry Smith CHKERRQ(ierr); 859d6dfbf8fSBarry Smith while (its--) { 860d6dfbf8fSBarry Smith /* go down through the rows */ 861d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 862d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 863dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 864dbb450caSBarry Smith v = A->a + A->i[i] + shift; 865d6dfbf8fSBarry Smith sum = b[i]; 866d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 867dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 868d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 869dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 870dbb450caSBarry Smith v = B->a + B->i[i] + shift; 871d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 87255a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 873d6dfbf8fSBarry Smith } 874d6dfbf8fSBarry Smith /* come up through the rows */ 875d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 876d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 877dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 878dbb450caSBarry Smith v = A->a + A->i[i] + shift; 879d6dfbf8fSBarry Smith sum = b[i]; 880d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 881dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 882d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 883dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 884dbb450caSBarry Smith v = B->a + B->i[i] + shift; 885d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 88655a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 887d6dfbf8fSBarry Smith } 888d6dfbf8fSBarry Smith } 889d6dfbf8fSBarry Smith } 890d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 891da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 89238597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 893da3a660dSBarry Smith } 89443a90d84SBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 89578b31e54SBarry Smith CHKERRQ(ierr); 89643a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 89778b31e54SBarry Smith CHKERRQ(ierr); 898d6dfbf8fSBarry Smith while (its--) { 899d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 900d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 901dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 902dbb450caSBarry Smith v = A->a + A->i[i] + shift; 903d6dfbf8fSBarry Smith sum = b[i]; 904d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 905dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 906d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 907dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 908dbb450caSBarry Smith v = B->a + B->i[i] + shift; 909d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 91055a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 911d6dfbf8fSBarry Smith } 912d6dfbf8fSBarry Smith } 913d6dfbf8fSBarry Smith } 914d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 915da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 91638597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 917da3a660dSBarry Smith } 91843a90d84SBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 91978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 92043a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 92178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 922d6dfbf8fSBarry Smith while (its--) { 923d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 924d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 925dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 926dbb450caSBarry Smith v = A->a + A->i[i] + shift; 927d6dfbf8fSBarry Smith sum = b[i]; 928d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 929dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 930d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 931dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 932dbb450caSBarry Smith v = B->a + B->i[i] + shift; 933d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 93455a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 935d6dfbf8fSBarry Smith } 936d6dfbf8fSBarry Smith } 937d6dfbf8fSBarry Smith } 938c16cb8f2SBarry Smith else { 939e3372554SBarry Smith SETERRQ(1,0,"Option not supported"); 940c16cb8f2SBarry Smith } 9418a729477SBarry Smith return 0; 9428a729477SBarry Smith } 943a66be287SLois Curfman McInnes 9445615d1e5SSatish Balay #undef __FUNC__ 9455615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIAIJ" 9464e220ebcSLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 947a66be287SLois Curfman McInnes { 948a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 949a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 9504e220ebcSLois Curfman McInnes int ierr; 9514e220ebcSLois Curfman McInnes double isend[5], irecv[5]; 952a66be287SLois Curfman McInnes 9534e220ebcSLois Curfman McInnes info->rows_global = (double)mat->M; 9544e220ebcSLois Curfman McInnes info->columns_global = (double)mat->N; 9554e220ebcSLois Curfman McInnes info->rows_local = (double)mat->m; 9564e220ebcSLois Curfman McInnes info->columns_local = (double)mat->N; 9574e220ebcSLois Curfman McInnes info->block_size = 1.0; 9584e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 9594e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 9604e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 9614e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 9624e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 9634e220ebcSLois Curfman McInnes isend[3] += info->memory; isend[4] += info->mallocs; 964a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 9654e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 9664e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 9674e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 9684e220ebcSLois Curfman McInnes info->memory = isend[3]; 9694e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 970a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 9714e220ebcSLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 9724e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 9734e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 9744e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 9754e220ebcSLois Curfman McInnes info->memory = irecv[3]; 9764e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 977a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 9784e220ebcSLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 9794e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 9804e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 9814e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 9824e220ebcSLois Curfman McInnes info->memory = irecv[3]; 9834e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 984a66be287SLois Curfman McInnes } 9854e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 9864e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 9874e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 9884e220ebcSLois Curfman McInnes 989a66be287SLois Curfman McInnes return 0; 990a66be287SLois Curfman McInnes } 991a66be287SLois Curfman McInnes 992299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 993299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 994299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 995299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 996299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 997299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 998299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 999299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1000299609e3SLois Curfman McInnes 10015615d1e5SSatish Balay #undef __FUNC__ 10025615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIAIJ" 1003416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1004c74985f6SBarry Smith { 1005c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1006c74985f6SBarry Smith 10076d4a8577SBarry Smith if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 10086d4a8577SBarry Smith op == MAT_YES_NEW_NONZERO_LOCATIONS || 10096da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1010b1fbbac0SLois Curfman McInnes op == MAT_COLUMNS_SORTED) { 1011b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 1012b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 1013b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1014aeafbbfcSLois Curfman McInnes a->roworiented = 1; 1015c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 1016c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 1017b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 10186da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 10196d4a8577SBarry Smith op == MAT_SYMMETRIC || 10206d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 10216d4a8577SBarry Smith op == MAT_YES_NEW_DIAGONALS) 102294a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 10236d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) { 10244b0e389bSBarry Smith a->roworiented = 0; 10254b0e389bSBarry Smith MatSetOption(a->A,op); 10264b0e389bSBarry Smith MatSetOption(a->B,op); 102790f02eecSBarry Smith } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) { 102890f02eecSBarry Smith a->donotstash = 1; 102990f02eecSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) 1030e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1031c0bbcb79SLois Curfman McInnes else 1032e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1033c74985f6SBarry Smith return 0; 1034c74985f6SBarry Smith } 1035c74985f6SBarry Smith 10365615d1e5SSatish Balay #undef __FUNC__ 10375615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIAIJ" 1038d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1039c74985f6SBarry Smith { 104044a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1041c74985f6SBarry Smith *m = mat->M; *n = mat->N; 1042c74985f6SBarry Smith return 0; 1043c74985f6SBarry Smith } 1044c74985f6SBarry Smith 10455615d1e5SSatish Balay #undef __FUNC__ 10465615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1047d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1048c74985f6SBarry Smith { 104944a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1050b7c46309SBarry Smith *m = mat->m; *n = mat->N; 1051c74985f6SBarry Smith return 0; 1052c74985f6SBarry Smith } 1053c74985f6SBarry Smith 10545615d1e5SSatish Balay #undef __FUNC__ 10555615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1056d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1057c74985f6SBarry Smith { 105844a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1059c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 1060c74985f6SBarry Smith return 0; 1061c74985f6SBarry Smith } 1062c74985f6SBarry Smith 10636d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 10646d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 10656d84be18SBarry Smith 10665615d1e5SSatish Balay #undef __FUNC__ 10675615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIAIJ" 10686d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 106939e00950SLois Curfman McInnes { 1070154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 107170f0671dSBarry Smith Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1072154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1073154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 107470f0671dSBarry Smith int *cmap, *idx_p; 107539e00950SLois Curfman McInnes 1076e3372554SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 10777a0afa10SBarry Smith mat->getrowactive = PETSC_TRUE; 10787a0afa10SBarry Smith 107970f0671dSBarry Smith if (!mat->rowvalues && (idx || v)) { 10807a0afa10SBarry Smith /* 10817a0afa10SBarry Smith allocate enough space to hold information from the longest row. 10827a0afa10SBarry Smith */ 10837a0afa10SBarry Smith Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1084c16cb8f2SBarry Smith int max = 1,m = mat->m,tmp; 1085c16cb8f2SBarry Smith for ( i=0; i<m; i++ ) { 10867a0afa10SBarry Smith tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 10877a0afa10SBarry Smith if (max < tmp) { max = tmp; } 10887a0afa10SBarry Smith } 10897a0afa10SBarry Smith mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 10907a0afa10SBarry Smith CHKPTRQ(mat->rowvalues); 10917a0afa10SBarry Smith mat->rowindices = (int *) (mat->rowvalues + max); 10927a0afa10SBarry Smith } 10937a0afa10SBarry Smith 1094e3372554SBarry Smith if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1095abc0e9e4SLois Curfman McInnes lrow = row - rstart; 109639e00950SLois Curfman McInnes 1097154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1098154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1099154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 110038597bd4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 110138597bd4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1102154123eaSLois Curfman McInnes nztot = nzA + nzB; 1103154123eaSLois Curfman McInnes 110470f0671dSBarry Smith cmap = mat->garray; 1105154123eaSLois Curfman McInnes if (v || idx) { 1106154123eaSLois Curfman McInnes if (nztot) { 1107154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 110870f0671dSBarry Smith int imark = -1; 1109154123eaSLois Curfman McInnes if (v) { 111070f0671dSBarry Smith *v = v_p = mat->rowvalues; 111139e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 111270f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1113154123eaSLois Curfman McInnes else break; 1114154123eaSLois Curfman McInnes } 1115154123eaSLois Curfman McInnes imark = i; 111670f0671dSBarry Smith for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 111770f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1118154123eaSLois Curfman McInnes } 1119154123eaSLois Curfman McInnes if (idx) { 112070f0671dSBarry Smith *idx = idx_p = mat->rowindices; 112170f0671dSBarry Smith if (imark > -1) { 112270f0671dSBarry Smith for ( i=0; i<imark; i++ ) { 112370f0671dSBarry Smith idx_p[i] = cmap[cworkB[i]]; 112470f0671dSBarry Smith } 112570f0671dSBarry Smith } else { 1126154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 112770f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1128154123eaSLois Curfman McInnes else break; 1129154123eaSLois Curfman McInnes } 1130154123eaSLois Curfman McInnes imark = i; 113170f0671dSBarry Smith } 113270f0671dSBarry Smith for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 113370f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 113439e00950SLois Curfman McInnes } 113539e00950SLois Curfman McInnes } 11361ca473b0SSatish Balay else { 11371ca473b0SSatish Balay if (idx) *idx = 0; 11381ca473b0SSatish Balay if (v) *v = 0; 11391ca473b0SSatish Balay } 1140154123eaSLois Curfman McInnes } 114139e00950SLois Curfman McInnes *nz = nztot; 114238597bd4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 114338597bd4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 114439e00950SLois Curfman McInnes return 0; 114539e00950SLois Curfman McInnes } 114639e00950SLois Curfman McInnes 11475615d1e5SSatish Balay #undef __FUNC__ 11485615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIAIJ" 11496d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 115039e00950SLois Curfman McInnes { 11517a0afa10SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 11527a0afa10SBarry Smith if (aij->getrowactive == PETSC_FALSE) { 1153e3372554SBarry Smith SETERRQ(1,0,"MatGetRow not called"); 11547a0afa10SBarry Smith } 11557a0afa10SBarry Smith aij->getrowactive = PETSC_FALSE; 115639e00950SLois Curfman McInnes return 0; 115739e00950SLois Curfman McInnes } 115839e00950SLois Curfman McInnes 11595615d1e5SSatish Balay #undef __FUNC__ 11605615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIAIJ" 1161cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1162855ac2c5SLois Curfman McInnes { 1163855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1164ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1165416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1166416022c9SBarry Smith double sum = 0.0; 116704ca555eSLois Curfman McInnes Scalar *v; 116804ca555eSLois Curfman McInnes 116917699dbbSLois Curfman McInnes if (aij->size == 1) { 117014183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 117137fa93a5SLois Curfman McInnes } else { 117204ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 117304ca555eSLois Curfman McInnes v = amat->a; 117404ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 117504ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 117604ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 117704ca555eSLois Curfman McInnes #else 117804ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 117904ca555eSLois Curfman McInnes #endif 118004ca555eSLois Curfman McInnes } 118104ca555eSLois Curfman McInnes v = bmat->a; 118204ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 118304ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 118404ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 118504ca555eSLois Curfman McInnes #else 118604ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 118704ca555eSLois Curfman McInnes #endif 118804ca555eSLois Curfman McInnes } 11896d84be18SBarry Smith MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 119004ca555eSLois Curfman McInnes *norm = sqrt(*norm); 119104ca555eSLois Curfman McInnes } 119204ca555eSLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 119304ca555eSLois Curfman McInnes double *tmp, *tmp2; 119404ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 11950452661fSBarry Smith tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 11960452661fSBarry Smith tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1197cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 119804ca555eSLois Curfman McInnes *norm = 0.0; 119904ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 120004ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1201579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 120204ca555eSLois Curfman McInnes } 120304ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 120404ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1205579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 120604ca555eSLois Curfman McInnes } 12076d84be18SBarry Smith MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 120804ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 120904ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 121004ca555eSLois Curfman McInnes } 12110452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 121204ca555eSLois Curfman McInnes } 121304ca555eSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 1214515d9167SLois Curfman McInnes double ntemp = 0.0; 121504ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1216dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 121704ca555eSLois Curfman McInnes sum = 0.0; 121804ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1219cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 122004ca555eSLois Curfman McInnes } 1221dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 122204ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1223cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 122404ca555eSLois Curfman McInnes } 1225515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 122604ca555eSLois Curfman McInnes } 12276d84be18SBarry Smith MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 122804ca555eSLois Curfman McInnes } 122904ca555eSLois Curfman McInnes else { 1230e3372554SBarry Smith SETERRQ(1,0,"No support for two norm"); 123104ca555eSLois Curfman McInnes } 123237fa93a5SLois Curfman McInnes } 1233855ac2c5SLois Curfman McInnes return 0; 1234855ac2c5SLois Curfman McInnes } 1235855ac2c5SLois Curfman McInnes 12365615d1e5SSatish Balay #undef __FUNC__ 12375615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIAIJ" 12380de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1239b7c46309SBarry Smith { 1240b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1241dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1242416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1243416022c9SBarry Smith Mat B; 1244b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1245b7c46309SBarry Smith Scalar *array; 1246b7c46309SBarry Smith 12473638b69dSLois Curfman McInnes if (matout == PETSC_NULL && M != N) 1248e3372554SBarry Smith SETERRQ(1,0,"Square matrix only for in-place"); 1249b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1250b4fd4287SBarry Smith PETSC_NULL,&B); CHKERRQ(ierr); 1251b7c46309SBarry Smith 1252b7c46309SBarry Smith /* copy over the A part */ 1253ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1254b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1255b7c46309SBarry Smith row = a->rstart; 1256dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1257b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1258416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1259b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1260b7c46309SBarry Smith } 1261b7c46309SBarry Smith aj = Aloc->j; 12624af08d9eSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1263b7c46309SBarry Smith 1264b7c46309SBarry Smith /* copy over the B part */ 1265ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1266b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1267b7c46309SBarry Smith row = a->rstart; 12680452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1269dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1270b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1271416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1272b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1273b7c46309SBarry Smith } 12740452661fSBarry Smith PetscFree(ct); 12756d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12766d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12773638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 12780de55854SLois Curfman McInnes *matout = B; 12790de55854SLois Curfman McInnes } else { 12800de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 12810452661fSBarry Smith PetscFree(a->rowners); 12820de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 12830de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 12840452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 12850452661fSBarry Smith if (a->garray) PetscFree(a->garray); 12860de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1287a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 12880452661fSBarry Smith PetscFree(a); 1289416022c9SBarry Smith PetscMemcpy(A,B,sizeof(struct _Mat)); 12900452661fSBarry Smith PetscHeaderDestroy(B); 12910de55854SLois Curfman McInnes } 1292b7c46309SBarry Smith return 0; 1293b7c46309SBarry Smith } 1294b7c46309SBarry Smith 12955615d1e5SSatish Balay #undef __FUNC__ 12965615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIAIJ" 12974b967eb1SSatish Balay int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1298a008b906SSatish Balay { 12994b967eb1SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 13004b967eb1SSatish Balay Mat a = aij->A, b = aij->B; 1301a008b906SSatish Balay int ierr,s1,s2,s3; 1302a008b906SSatish Balay 13034b967eb1SSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 13044b967eb1SSatish Balay if (rr) { 13054b967eb1SSatish Balay s3 = aij->n; 13064b967eb1SSatish Balay VecGetLocalSize_Fast(rr,s1); 1307e3372554SBarry Smith if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 13084b967eb1SSatish Balay /* Overlap communication with computation. */ 130943a90d84SBarry Smith ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1310a008b906SSatish Balay } 13114b967eb1SSatish Balay if (ll) { 13124b967eb1SSatish Balay VecGetLocalSize_Fast(ll,s1); 1313e3372554SBarry Smith if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1314c351683dSSatish Balay ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 13154b967eb1SSatish Balay } 13164b967eb1SSatish Balay /* scale the diagonal block */ 1317c351683dSSatish Balay ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 13184b967eb1SSatish Balay 13194b967eb1SSatish Balay if (rr) { 13204b967eb1SSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 132143a90d84SBarry Smith ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1322c351683dSSatish Balay ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 13234b967eb1SSatish Balay } 13244b967eb1SSatish Balay 1325a008b906SSatish Balay return 0; 1326a008b906SSatish Balay } 1327a008b906SSatish Balay 1328a008b906SSatish Balay 1329682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat); 13305615d1e5SSatish Balay #undef __FUNC__ 13315615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIAIJ" 1332682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A) 1333682d7d0cSBarry Smith { 1334682d7d0cSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1335682d7d0cSBarry Smith 1336682d7d0cSBarry Smith if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1337682d7d0cSBarry Smith else return 0; 1338682d7d0cSBarry Smith } 1339682d7d0cSBarry Smith 13405615d1e5SSatish Balay #undef __FUNC__ 13415615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIAIJ" 13425a838052SSatish Balay static int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 13435a838052SSatish Balay { 13445a838052SSatish Balay *bs = 1; 13455a838052SSatish Balay return 0; 13465a838052SSatish Balay } 13475615d1e5SSatish Balay #undef __FUNC__ 13485615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1349bb5a7306SBarry Smith static int MatSetUnfactored_MPIAIJ(Mat A) 1350bb5a7306SBarry Smith { 1351bb5a7306SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1352bb5a7306SBarry Smith int ierr; 1353bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1354bb5a7306SBarry Smith return 0; 1355bb5a7306SBarry Smith } 1356bb5a7306SBarry Smith 13573d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 13582f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 13590a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 13600a198c4cSBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 13618a729477SBarry Smith /* -------------------------------------------------------------------*/ 13622ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 136339e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 136444a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 136544a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 136636ce4990SBarry Smith 0,0, 136736ce4990SBarry Smith 0,0, 136836ce4990SBarry Smith 0,0, 136944a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1370b7c46309SBarry Smith MatTranspose_MPIAIJ, 1371a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1372a008b906SSatish Balay MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1373ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 13741eb62cbbSBarry Smith 0, 1375299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 137636ce4990SBarry Smith 0,0,0,0, 1377d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 137836ce4990SBarry Smith 0,0, 137994a9d846SBarry Smith 0,0,MatConvertSameType_MPIAIJ,0,0, 1380b49de8d1SLois Curfman McInnes 0,0,0, 1381598137ffSSatish Balay MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1382052efed2SBarry Smith MatPrintHelp_MPIAIJ, 13833b2fbd54SBarry Smith MatScale_MPIAIJ,0,0,0, 13840a198c4cSBarry Smith MatGetBlockSize_MPIAIJ,0,0,0,0, 1385bb5a7306SBarry Smith MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ}; 138636ce4990SBarry Smith 13878a729477SBarry Smith 13885615d1e5SSatish Balay #undef __FUNC__ 13895615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIAIJ" 13901987afe7SBarry Smith /*@C 1391ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 13923a511b96SLois Curfman McInnes (the default parallel PETSc format). For good matrix assembly performance 13933a511b96SLois Curfman McInnes the user should preallocate the matrix storage by setting the parameters 13943a511b96SLois Curfman McInnes d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 13953a511b96SLois Curfman McInnes performance can be increased by more than a factor of 50. 13968a729477SBarry Smith 13978a729477SBarry Smith Input Parameters: 13981eb62cbbSBarry Smith . comm - MPI communicator 13997d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 140092e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 140192e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 140292e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 140392e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 140492e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 14057d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 140692e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1407ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1408ff756334SLois Curfman McInnes (same for all local rows) 14092bd5e0b2SLois Curfman McInnes . d_nzz - array containing the number of nonzeros in the various rows of the 141092e8d321SLois Curfman McInnes diagonal portion of the local submatrix (possibly different for each row) 14112bd5e0b2SLois Curfman McInnes or PETSC_NULL. You must leave room for the diagonal entry even if 14122bd5e0b2SLois Curfman McInnes it is zero. 14132bd5e0b2SLois Curfman McInnes . o_nz - number of nonzeros per row in the off-diagonal portion of local 1414ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 14152bd5e0b2SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various rows of the 14162bd5e0b2SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 14172bd5e0b2SLois Curfman McInnes each row) or PETSC_NULL. 14188a729477SBarry Smith 1419ff756334SLois Curfman McInnes Output Parameter: 142044cd7ae7SLois Curfman McInnes . A - the matrix 14218a729477SBarry Smith 1422ff756334SLois Curfman McInnes Notes: 1423ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1424ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 14250002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 14260002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1427ff756334SLois Curfman McInnes 1428ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1429ff756334SLois Curfman McInnes (possibly both). 1430ff756334SLois Curfman McInnes 14315511cfe3SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 14325511cfe3SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 14335511cfe3SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 14345511cfe3SLois Curfman McInnes 14355511cfe3SLois Curfman McInnes Options Database Keys: 14365511cfe3SLois Curfman McInnes $ -mat_aij_no_inode - Do not use inodes 14376e62573dSLois Curfman McInnes $ -mat_aij_inode_limit <limit> - Set inode limit. 14386e62573dSLois Curfman McInnes $ (max limit=5) 14396e62573dSLois Curfman McInnes $ -mat_aij_oneindex - Internally use indexing starting at 1 14406e62573dSLois Curfman McInnes $ rather than 0. Note: When calling MatSetValues(), 14416e62573dSLois Curfman McInnes $ the user still MUST index entries starting at 0! 14425511cfe3SLois Curfman McInnes 1443e0245417SLois Curfman McInnes Storage Information: 1444e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1445e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1446e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1447e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1448e0245417SLois Curfman McInnes 1449e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 14505ace5be8SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set 14515ace5be8SLois Curfman McInnes d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 14525ace5be8SLois Curfman McInnes memory allocation. Likewise, specify preallocated storage for the 14535ace5be8SLois Curfman McInnes off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1454ff756334SLois Curfman McInnes 14555511cfe3SLois Curfman McInnes Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 14565511cfe3SLois Curfman McInnes the figure below we depict these three local rows and all columns (0-11). 14572191d07cSBarry Smith 1458b810aeb4SBarry Smith $ 0 1 2 3 4 5 6 7 8 9 10 11 1459b810aeb4SBarry Smith $ ------------------- 14605511cfe3SLois Curfman McInnes $ row 3 | o o o d d d o o o o o o 14615511cfe3SLois Curfman McInnes $ row 4 | o o o d d d o o o o o o 14625511cfe3SLois Curfman McInnes $ row 5 | o o o d d d o o o o o o 14635511cfe3SLois Curfman McInnes $ ------------------- 1464b810aeb4SBarry Smith $ 1465b810aeb4SBarry Smith 14665511cfe3SLois Curfman McInnes Thus, any entries in the d locations are stored in the d (diagonal) 14675511cfe3SLois Curfman McInnes submatrix, and any entries in the o locations are stored in the 14685511cfe3SLois Curfman McInnes o (off-diagonal) submatrix. Note that the d and the o submatrices are 14695511cfe3SLois Curfman McInnes stored simply in the MATSEQAIJ format for compressed row storage. 14705511cfe3SLois Curfman McInnes 14715511cfe3SLois Curfman McInnes Now d_nz should indicate the number of nonzeros per row in the d matrix, 14725511cfe3SLois Curfman McInnes and o_nz should indicate the number of nonzeros per row in the o matrix. 14735511cfe3SLois Curfman McInnes In general, for PDE problems in which most nonzeros are near the diagonal, 14743d323bbdSBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 147592e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 14766da5968aSLois Curfman McInnes matrices. 14773a511b96SLois Curfman McInnes 1478dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1479ff756334SLois Curfman McInnes 1480fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 14818a729477SBarry Smith @*/ 14821eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 148344cd7ae7SLois Curfman McInnes int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 14848a729477SBarry Smith { 148544cd7ae7SLois Curfman McInnes Mat B; 148644cd7ae7SLois Curfman McInnes Mat_MPIAIJ *b; 148736ce4990SBarry Smith int ierr, i,sum[2],work[2],size; 1488416022c9SBarry Smith 148944cd7ae7SLois Curfman McInnes *A = 0; 149044cd7ae7SLois Curfman McInnes PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 149144cd7ae7SLois Curfman McInnes PLogObjectCreate(B); 149244cd7ae7SLois Curfman McInnes B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 149344cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_MPIAIJ)); 149444cd7ae7SLois Curfman McInnes PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 149536ce4990SBarry Smith MPI_Comm_size(comm,&size); 149636ce4990SBarry Smith if (size == 1) { 149736ce4990SBarry Smith B->ops.getrowij = MatGetRowIJ_MPIAIJ; 149836ce4990SBarry Smith B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 149936ce4990SBarry Smith B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 150036ce4990SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 150136ce4990SBarry Smith B->ops.lufactor = MatLUFactor_MPIAIJ; 150236ce4990SBarry Smith B->ops.solve = MatSolve_MPIAIJ; 150336ce4990SBarry Smith B->ops.solveadd = MatSolveAdd_MPIAIJ; 150436ce4990SBarry Smith B->ops.solvetrans = MatSolveTrans_MPIAIJ; 150536ce4990SBarry Smith B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 150636ce4990SBarry Smith B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 150736ce4990SBarry Smith } 150844cd7ae7SLois Curfman McInnes B->destroy = MatDestroy_MPIAIJ; 150944cd7ae7SLois Curfman McInnes B->view = MatView_MPIAIJ; 151044cd7ae7SLois Curfman McInnes B->factor = 0; 151144cd7ae7SLois Curfman McInnes B->assembled = PETSC_FALSE; 151290f02eecSBarry Smith B->mapping = 0; 1513d6dfbf8fSBarry Smith 151444cd7ae7SLois Curfman McInnes b->insertmode = NOT_SET_VALUES; 151544cd7ae7SLois Curfman McInnes MPI_Comm_rank(comm,&b->rank); 151644cd7ae7SLois Curfman McInnes MPI_Comm_size(comm,&b->size); 15171eb62cbbSBarry Smith 1518b4fd4287SBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1519e3372554SBarry Smith SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 15201987afe7SBarry Smith 1521dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 15221eb62cbbSBarry Smith work[0] = m; work[1] = n; 1523d65a2f8fSBarry Smith MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1524dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1525dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 15261eb62cbbSBarry Smith } 152744cd7ae7SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 152844cd7ae7SLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 152944cd7ae7SLois Curfman McInnes b->m = m; B->m = m; 153044cd7ae7SLois Curfman McInnes b->n = n; B->n = n; 153144cd7ae7SLois Curfman McInnes b->N = N; B->N = N; 153244cd7ae7SLois Curfman McInnes b->M = M; B->M = M; 15331eb62cbbSBarry Smith 15341eb62cbbSBarry Smith /* build local table of row and column ownerships */ 153544cd7ae7SLois Curfman McInnes b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 153644cd7ae7SLois Curfman McInnes PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1537603f58a4SSatish Balay b->cowners = b->rowners + b->size + 2; 153844cd7ae7SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 153944cd7ae7SLois Curfman McInnes b->rowners[0] = 0; 154044cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 154144cd7ae7SLois Curfman McInnes b->rowners[i] += b->rowners[i-1]; 15428a729477SBarry Smith } 154344cd7ae7SLois Curfman McInnes b->rstart = b->rowners[b->rank]; 154444cd7ae7SLois Curfman McInnes b->rend = b->rowners[b->rank+1]; 154544cd7ae7SLois Curfman McInnes MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 154644cd7ae7SLois Curfman McInnes b->cowners[0] = 0; 154744cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 154844cd7ae7SLois Curfman McInnes b->cowners[i] += b->cowners[i-1]; 15498a729477SBarry Smith } 155044cd7ae7SLois Curfman McInnes b->cstart = b->cowners[b->rank]; 155144cd7ae7SLois Curfman McInnes b->cend = b->cowners[b->rank+1]; 15528a729477SBarry Smith 15535ace5be8SLois Curfman McInnes if (d_nz == PETSC_DEFAULT) d_nz = 5; 155444cd7ae7SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 155544cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->A); 15567b8455f0SLois Curfman McInnes if (o_nz == PETSC_DEFAULT) o_nz = 0; 155744cd7ae7SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 155844cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->B); 15598a729477SBarry Smith 15601eb62cbbSBarry Smith /* build cache for off array entries formed */ 156144cd7ae7SLois Curfman McInnes ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 156290f02eecSBarry Smith b->donotstash = 0; 156344cd7ae7SLois Curfman McInnes b->colmap = 0; 156444cd7ae7SLois Curfman McInnes b->garray = 0; 156544cd7ae7SLois Curfman McInnes b->roworiented = 1; 15668a729477SBarry Smith 15671eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 156844cd7ae7SLois Curfman McInnes b->lvec = 0; 156944cd7ae7SLois Curfman McInnes b->Mvctx = 0; 15708a729477SBarry Smith 15717a0afa10SBarry Smith /* stuff for MatGetRow() */ 157244cd7ae7SLois Curfman McInnes b->rowindices = 0; 157344cd7ae7SLois Curfman McInnes b->rowvalues = 0; 157444cd7ae7SLois Curfman McInnes b->getrowactive = PETSC_FALSE; 15757a0afa10SBarry Smith 157644cd7ae7SLois Curfman McInnes *A = B; 1577d6dfbf8fSBarry Smith return 0; 1578d6dfbf8fSBarry Smith } 1579c74985f6SBarry Smith 15805615d1e5SSatish Balay #undef __FUNC__ 15815615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIAIJ" 15823d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1583d6dfbf8fSBarry Smith { 1584d6dfbf8fSBarry Smith Mat mat; 1585416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1586a1b97e82SLois Curfman McInnes int ierr, len=0, flg; 1587d6dfbf8fSBarry Smith 1588416022c9SBarry Smith *newmat = 0; 15890452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1590a5a9c739SBarry Smith PLogObjectCreate(mat); 15910452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1592416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 159344a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 159444a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1595d6dfbf8fSBarry Smith mat->factor = matin->factor; 1596c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1597d6dfbf8fSBarry Smith 159844cd7ae7SLois Curfman McInnes a->m = mat->m = oldmat->m; 159944cd7ae7SLois Curfman McInnes a->n = mat->n = oldmat->n; 160044cd7ae7SLois Curfman McInnes a->M = mat->M = oldmat->M; 160144cd7ae7SLois Curfman McInnes a->N = mat->N = oldmat->N; 1602d6dfbf8fSBarry Smith 1603416022c9SBarry Smith a->rstart = oldmat->rstart; 1604416022c9SBarry Smith a->rend = oldmat->rend; 1605416022c9SBarry Smith a->cstart = oldmat->cstart; 1606416022c9SBarry Smith a->cend = oldmat->cend; 160717699dbbSLois Curfman McInnes a->size = oldmat->size; 160817699dbbSLois Curfman McInnes a->rank = oldmat->rank; 160964119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 1610bcd2baecSBarry Smith a->rowvalues = 0; 1611bcd2baecSBarry Smith a->getrowactive = PETSC_FALSE; 1612d6dfbf8fSBarry Smith 1613603f58a4SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1614603f58a4SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1615603f58a4SSatish Balay a->cowners = a->rowners + a->size + 2; 1616603f58a4SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1617416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 16182ee70a88SLois Curfman McInnes if (oldmat->colmap) { 16190452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1620416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1621416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1622416022c9SBarry Smith } else a->colmap = 0; 16233f41c07dSBarry Smith if (oldmat->garray) { 16243f41c07dSBarry Smith len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 16253f41c07dSBarry Smith a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1626464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 16273f41c07dSBarry Smith if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1628416022c9SBarry Smith } else a->garray = 0; 1629d6dfbf8fSBarry Smith 1630416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1631416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1632a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1633416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1634416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1635416022c9SBarry Smith PLogObjectParent(mat,a->A); 1636416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1637416022c9SBarry Smith PLogObjectParent(mat,a->B); 16385dd7a6c7SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 163925cdf11fSBarry Smith if (flg) { 1640682d7d0cSBarry Smith ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1641682d7d0cSBarry Smith } 16428a729477SBarry Smith *newmat = mat; 16438a729477SBarry Smith return 0; 16448a729477SBarry Smith } 1645416022c9SBarry Smith 164677c4ece6SBarry Smith #include "sys.h" 1647416022c9SBarry Smith 16485615d1e5SSatish Balay #undef __FUNC__ 16495615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIAIJ" 165019bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1651416022c9SBarry Smith { 1652d65a2f8fSBarry Smith Mat A; 1653416022c9SBarry Smith int i, nz, ierr, j,rstart, rend, fd; 1654d65a2f8fSBarry Smith Scalar *vals,*svals; 165519bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 1656416022c9SBarry Smith MPI_Status status; 165717699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1658d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 165919bcc07fSBarry Smith int tag = ((PetscObject)viewer)->tag; 1660416022c9SBarry Smith 166117699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 166217699dbbSLois Curfman McInnes if (!rank) { 166390ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 166477c4ece6SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1665e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1666416022c9SBarry Smith } 1667416022c9SBarry Smith 1668416022c9SBarry Smith MPI_Bcast(header+1,3,MPI_INT,0,comm); 1669416022c9SBarry Smith M = header[1]; N = header[2]; 1670416022c9SBarry Smith /* determine ownership of all rows */ 167117699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 16720452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1673416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1674416022c9SBarry Smith rowners[0] = 0; 167517699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1676416022c9SBarry Smith rowners[i] += rowners[i-1]; 1677416022c9SBarry Smith } 167817699dbbSLois Curfman McInnes rstart = rowners[rank]; 167917699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1680416022c9SBarry Smith 1681416022c9SBarry Smith /* distribute row lengths to all processors */ 16820452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1683416022c9SBarry Smith offlens = ourlens + (rend-rstart); 168417699dbbSLois Curfman McInnes if (!rank) { 16850452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 168677c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 16870452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 168817699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1689d65a2f8fSBarry Smith MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 16900452661fSBarry Smith PetscFree(sndcounts); 1691416022c9SBarry Smith } 1692416022c9SBarry Smith else { 1693416022c9SBarry Smith MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1694416022c9SBarry Smith } 1695416022c9SBarry Smith 169617699dbbSLois Curfman McInnes if (!rank) { 1697416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 16980452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1699cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 170017699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1701416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1702416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1703416022c9SBarry Smith } 1704416022c9SBarry Smith } 17050452661fSBarry Smith PetscFree(rowlengths); 1706416022c9SBarry Smith 1707416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1708416022c9SBarry Smith maxnz = 0; 170917699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 17100452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 1711416022c9SBarry Smith } 17120452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1713416022c9SBarry Smith 1714416022c9SBarry Smith /* read in my part of the matrix column indices */ 1715416022c9SBarry Smith nz = procsnz[0]; 17160452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 171777c4ece6SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1718d65a2f8fSBarry Smith 1719d65a2f8fSBarry Smith /* read in every one elses and ship off */ 172017699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1721d65a2f8fSBarry Smith nz = procsnz[i]; 172277c4ece6SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1723d65a2f8fSBarry Smith MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1724d65a2f8fSBarry Smith } 17250452661fSBarry Smith PetscFree(cols); 1726416022c9SBarry Smith } 1727416022c9SBarry Smith else { 1728416022c9SBarry Smith /* determine buffer space needed for message */ 1729416022c9SBarry Smith nz = 0; 1730416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1731416022c9SBarry Smith nz += ourlens[i]; 1732416022c9SBarry Smith } 17330452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1734416022c9SBarry Smith 1735416022c9SBarry Smith /* receive message of column indices*/ 1736d65a2f8fSBarry Smith MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1737416022c9SBarry Smith MPI_Get_count(&status,MPI_INT,&maxnz); 1738e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1739416022c9SBarry Smith } 1740416022c9SBarry Smith 1741416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1742cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 1743416022c9SBarry Smith jj = 0; 1744416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1745416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1746d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1747416022c9SBarry Smith jj++; 1748416022c9SBarry Smith } 1749416022c9SBarry Smith } 1750d65a2f8fSBarry Smith 1751d65a2f8fSBarry Smith /* create our matrix */ 1752416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1753416022c9SBarry Smith ourlens[i] -= offlens[i]; 1754416022c9SBarry Smith } 1755d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1756d65a2f8fSBarry Smith A = *newmat; 17576d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 1758d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1759d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1760d65a2f8fSBarry Smith } 1761416022c9SBarry Smith 176217699dbbSLois Curfman McInnes if (!rank) { 17630452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1764416022c9SBarry Smith 1765416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1766416022c9SBarry Smith nz = procsnz[0]; 176777c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1768d65a2f8fSBarry Smith 1769d65a2f8fSBarry Smith /* insert into matrix */ 1770d65a2f8fSBarry Smith jj = rstart; 1771d65a2f8fSBarry Smith smycols = mycols; 1772d65a2f8fSBarry Smith svals = vals; 1773d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1774d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1775d65a2f8fSBarry Smith smycols += ourlens[i]; 1776d65a2f8fSBarry Smith svals += ourlens[i]; 1777d65a2f8fSBarry Smith jj++; 1778416022c9SBarry Smith } 1779416022c9SBarry Smith 1780d65a2f8fSBarry Smith /* read in other processors and ship out */ 178117699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1782416022c9SBarry Smith nz = procsnz[i]; 178377c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1784d65a2f8fSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1785416022c9SBarry Smith } 17860452661fSBarry Smith PetscFree(procsnz); 1787416022c9SBarry Smith } 1788d65a2f8fSBarry Smith else { 1789d65a2f8fSBarry Smith /* receive numeric values */ 17900452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1791416022c9SBarry Smith 1792d65a2f8fSBarry Smith /* receive message of values*/ 1793d65a2f8fSBarry Smith MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1794d65a2f8fSBarry Smith MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1795e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1796d65a2f8fSBarry Smith 1797d65a2f8fSBarry Smith /* insert into matrix */ 1798d65a2f8fSBarry Smith jj = rstart; 1799d65a2f8fSBarry Smith smycols = mycols; 1800d65a2f8fSBarry Smith svals = vals; 1801d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1802d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1803d65a2f8fSBarry Smith smycols += ourlens[i]; 1804d65a2f8fSBarry Smith svals += ourlens[i]; 1805d65a2f8fSBarry Smith jj++; 1806d65a2f8fSBarry Smith } 1807d65a2f8fSBarry Smith } 18080452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1809d65a2f8fSBarry Smith 18106d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 18116d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1812416022c9SBarry Smith return 0; 1813416022c9SBarry Smith } 1814