1b57c8cebSBarry Smith 2a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 3*6eb55b6aSBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.240 1998/04/27 03:54:07 curfman Exp bsmith $"; 4cb512458SBarry Smith #endif 58a729477SBarry Smith 63369ce9aSBarry Smith #include "pinclude/pviewer.h" 770f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h" 8f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 9d9942c19SSatish Balay #include "src/inline/spops.h" 108a729477SBarry Smith 119e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 129e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 139e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 149e25ed09SBarry Smith length of colmap equals the global matrix length. 159e25ed09SBarry Smith */ 165615d1e5SSatish Balay #undef __FUNC__ 17d4bb536fSBarry Smith #define __FUNC__ "CreateColmap_MPIAIJ_Private" 180a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat) 199e25ed09SBarry Smith { 2044a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 21ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 22905e6a2fSBarry Smith int n = B->n,i; 23dbb450caSBarry Smith 243a40ed3dSBarry Smith PetscFunctionBegin; 25758f045eSSatish Balay aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 26464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 27cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 28905e6a2fSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 293a40ed3dSBarry Smith PetscFunctionReturn(0); 309e25ed09SBarry Smith } 319e25ed09SBarry Smith 322493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 332493cbb0SBarry Smith 340520107fSSatish Balay #define CHUNKSIZE 15 3530770e4dSSatish Balay #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 360520107fSSatish Balay { \ 370520107fSSatish Balay \ 380520107fSSatish Balay rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 3930770e4dSSatish Balay rmax = aimax[row]; nrow = ailen[row]; \ 40f5e9677aSSatish Balay col1 = col - shift; \ 41f5e9677aSSatish Balay \ 42ba4e3ef2SSatish Balay low = 0; high = nrow; \ 43ba4e3ef2SSatish Balay while (high-low > 5) { \ 44ba4e3ef2SSatish Balay t = (low+high)/2; \ 45ba4e3ef2SSatish Balay if (rp[t] > col) high = t; \ 46ba4e3ef2SSatish Balay else low = t; \ 47ba4e3ef2SSatish Balay } \ 480520107fSSatish Balay for ( _i=0; _i<nrow; _i++ ) { \ 49f5e9677aSSatish Balay if (rp[_i] > col1) break; \ 50f5e9677aSSatish Balay if (rp[_i] == col1) { \ 510520107fSSatish Balay if (addv == ADD_VALUES) ap[_i] += value; \ 520520107fSSatish Balay else ap[_i] = value; \ 5330770e4dSSatish Balay goto a_noinsert; \ 540520107fSSatish Balay } \ 550520107fSSatish Balay } \ 5689280ab3SLois Curfman McInnes if (nonew == 1) goto a_noinsert; \ 57a8c6a408SBarry Smith else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 580520107fSSatish Balay if (nrow >= rmax) { \ 590520107fSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 600520107fSSatish Balay int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 610520107fSSatish Balay Scalar *new_a; \ 620520107fSSatish Balay \ 63a8c6a408SBarry Smith if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 6489280ab3SLois Curfman McInnes \ 650520107fSSatish Balay /* malloc new storage space */ \ 660520107fSSatish Balay len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 670520107fSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 680520107fSSatish Balay new_j = (int *) (new_a + new_nz); \ 690520107fSSatish Balay new_i = new_j + new_nz; \ 700520107fSSatish Balay \ 710520107fSSatish Balay /* copy over old data into new slots */ \ 720520107fSSatish Balay for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 730520107fSSatish Balay for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 740520107fSSatish Balay PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 750520107fSSatish Balay len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 760520107fSSatish Balay PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 770520107fSSatish Balay len*sizeof(int)); \ 780520107fSSatish Balay PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 790520107fSSatish Balay PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 800520107fSSatish Balay len*sizeof(Scalar)); \ 810520107fSSatish Balay /* free up old matrix storage */ \ 82f5e9677aSSatish Balay \ 830520107fSSatish Balay PetscFree(a->a); \ 840520107fSSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 850520107fSSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 860520107fSSatish Balay a->singlemalloc = 1; \ 870520107fSSatish Balay \ 880520107fSSatish Balay rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 8930770e4dSSatish Balay rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 900520107fSSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 910520107fSSatish Balay a->maxnz += CHUNKSIZE; \ 920520107fSSatish Balay a->reallocs++; \ 930520107fSSatish Balay } \ 940520107fSSatish Balay N = nrow++ - 1; a->nz++; \ 950520107fSSatish Balay /* shift up all the later entries in this row */ \ 960520107fSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 970520107fSSatish Balay rp[ii+1] = rp[ii]; \ 980520107fSSatish Balay ap[ii+1] = ap[ii]; \ 990520107fSSatish Balay } \ 100f5e9677aSSatish Balay rp[_i] = col1; \ 1010520107fSSatish Balay ap[_i] = value; \ 10230770e4dSSatish Balay a_noinsert: ; \ 1030520107fSSatish Balay ailen[row] = nrow; \ 1040520107fSSatish Balay } 1050a198c4cSBarry Smith 10630770e4dSSatish Balay #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 10730770e4dSSatish Balay { \ 10830770e4dSSatish Balay \ 10930770e4dSSatish Balay rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 11030770e4dSSatish Balay rmax = bimax[row]; nrow = bilen[row]; \ 11130770e4dSSatish Balay col1 = col - shift; \ 11230770e4dSSatish Balay \ 113ba4e3ef2SSatish Balay low = 0; high = nrow; \ 114ba4e3ef2SSatish Balay while (high-low > 5) { \ 115ba4e3ef2SSatish Balay t = (low+high)/2; \ 116ba4e3ef2SSatish Balay if (rp[t] > col) high = t; \ 117ba4e3ef2SSatish Balay else low = t; \ 118ba4e3ef2SSatish Balay } \ 11930770e4dSSatish Balay for ( _i=0; _i<nrow; _i++ ) { \ 12030770e4dSSatish Balay if (rp[_i] > col1) break; \ 12130770e4dSSatish Balay if (rp[_i] == col1) { \ 12230770e4dSSatish Balay if (addv == ADD_VALUES) ap[_i] += value; \ 12330770e4dSSatish Balay else ap[_i] = value; \ 12430770e4dSSatish Balay goto b_noinsert; \ 12530770e4dSSatish Balay } \ 12630770e4dSSatish Balay } \ 12789280ab3SLois Curfman McInnes if (nonew == 1) goto b_noinsert; \ 128a8c6a408SBarry Smith else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 12930770e4dSSatish Balay if (nrow >= rmax) { \ 13030770e4dSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 13174c639caSSatish Balay int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 13230770e4dSSatish Balay Scalar *new_a; \ 13330770e4dSSatish Balay \ 134a8c6a408SBarry Smith if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 13589280ab3SLois Curfman McInnes \ 13630770e4dSSatish Balay /* malloc new storage space */ \ 13774c639caSSatish Balay len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 13830770e4dSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 13930770e4dSSatish Balay new_j = (int *) (new_a + new_nz); \ 14030770e4dSSatish Balay new_i = new_j + new_nz; \ 14130770e4dSSatish Balay \ 14230770e4dSSatish Balay /* copy over old data into new slots */ \ 14330770e4dSSatish Balay for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 14474c639caSSatish Balay for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 14530770e4dSSatish Balay PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 14630770e4dSSatish Balay len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 14730770e4dSSatish Balay PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 14830770e4dSSatish Balay len*sizeof(int)); \ 14930770e4dSSatish Balay PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 15030770e4dSSatish Balay PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 15130770e4dSSatish Balay len*sizeof(Scalar)); \ 15230770e4dSSatish Balay /* free up old matrix storage */ \ 15330770e4dSSatish Balay \ 15474c639caSSatish Balay PetscFree(b->a); \ 15574c639caSSatish Balay if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 15674c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 15774c639caSSatish Balay b->singlemalloc = 1; \ 15830770e4dSSatish Balay \ 15930770e4dSSatish Balay rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 16030770e4dSSatish Balay rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 16174c639caSSatish Balay PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 16274c639caSSatish Balay b->maxnz += CHUNKSIZE; \ 16374c639caSSatish Balay b->reallocs++; \ 16430770e4dSSatish Balay } \ 16574c639caSSatish Balay N = nrow++ - 1; b->nz++; \ 16630770e4dSSatish Balay /* shift up all the later entries in this row */ \ 16730770e4dSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 16830770e4dSSatish Balay rp[ii+1] = rp[ii]; \ 16930770e4dSSatish Balay ap[ii+1] = ap[ii]; \ 17030770e4dSSatish Balay } \ 17130770e4dSSatish Balay rp[_i] = col1; \ 17230770e4dSSatish Balay ap[_i] = value; \ 17330770e4dSSatish Balay b_noinsert: ; \ 17430770e4dSSatish Balay bilen[row] = nrow; \ 17530770e4dSSatish Balay } 17630770e4dSSatish Balay 1770520107fSSatish Balay extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 1785615d1e5SSatish Balay #undef __FUNC__ 1795615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIAIJ" 1808f6be9afSLois Curfman McInnes int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 1818a729477SBarry Smith { 18244a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1834b0e389bSBarry Smith Scalar value; 1841eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 1851eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 186905e6a2fSBarry Smith int roworiented = aij->roworiented; 1878a729477SBarry Smith 1880520107fSSatish Balay /* Some Variables required in the macro */ 1894ee7247eSSatish Balay Mat A = aij->A; 1904ee7247eSSatish Balay Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 19130770e4dSSatish Balay int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 19230770e4dSSatish Balay Scalar *aa = a->a; 19330770e4dSSatish Balay 19430770e4dSSatish Balay Mat B = aij->B; 19530770e4dSSatish Balay Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 19630770e4dSSatish Balay int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 19730770e4dSSatish Balay Scalar *ba = b->a; 19830770e4dSSatish Balay 199ba4e3ef2SSatish Balay int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 20030770e4dSSatish Balay int nonew = a->nonew,shift = a->indexshift; 20130770e4dSSatish Balay Scalar *ap; 2024ee7247eSSatish Balay 2033a40ed3dSBarry Smith PetscFunctionBegin; 2048a729477SBarry Smith for ( i=0; i<m; i++ ) { 2053a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 206a8c6a408SBarry Smith if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 207a8c6a408SBarry Smith if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 2080a198c4cSBarry Smith #endif 2094b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 2104b0e389bSBarry Smith row = im[i] - rstart; 2111eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 2124b0e389bSBarry Smith if (in[j] >= cstart && in[j] < cend){ 2134b0e389bSBarry Smith col = in[j] - cstart; 2144b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 21530770e4dSSatish Balay MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 2160520107fSSatish Balay /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 2171eb62cbbSBarry Smith } 2183a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 219a8c6a408SBarry Smith else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 220a8c6a408SBarry Smith else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 2210a198c4cSBarry Smith #endif 2221eb62cbbSBarry Smith else { 223227d817aSBarry Smith if (mat->was_assembled) { 224905e6a2fSBarry Smith if (!aij->colmap) { 225905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 226905e6a2fSBarry Smith } 227905e6a2fSBarry Smith col = aij->colmap[in[j]] - 1; 228ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 2292493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2304b0e389bSBarry Smith col = in[j]; 2319bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 232f9508a3cSSatish Balay B = aij->B; 233f9508a3cSSatish Balay b = (Mat_SeqAIJ *) B->data; 234f9508a3cSSatish Balay bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 235f9508a3cSSatish Balay ba = b->a; 236d6dfbf8fSBarry Smith } 237c48de900SBarry Smith } else col = in[j]; 2384b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 23930770e4dSSatish Balay MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 24030770e4dSSatish Balay /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 2411eb62cbbSBarry Smith } 2421eb62cbbSBarry Smith } 2431eb62cbbSBarry Smith } 2441eb62cbbSBarry Smith else { 24590f02eecSBarry Smith if (roworiented && !aij->donotstash) { 2464b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 2474b0e389bSBarry Smith } 2484b0e389bSBarry Smith else { 24990f02eecSBarry Smith if (!aij->donotstash) { 2504b0e389bSBarry Smith row = im[i]; 2514b0e389bSBarry Smith for ( j=0; j<n; j++ ) { 2524b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 2534b0e389bSBarry Smith } 2544b0e389bSBarry Smith } 2551eb62cbbSBarry Smith } 2568a729477SBarry Smith } 25790f02eecSBarry Smith } 2583a40ed3dSBarry Smith PetscFunctionReturn(0); 2598a729477SBarry Smith } 2608a729477SBarry Smith 2615615d1e5SSatish Balay #undef __FUNC__ 2625615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIAIJ" 2638f6be9afSLois Curfman McInnes int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 264b49de8d1SLois Curfman McInnes { 265b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 266b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 267b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 268b49de8d1SLois Curfman McInnes 2693a40ed3dSBarry Smith PetscFunctionBegin; 270b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 271a8c6a408SBarry Smith if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 272a8c6a408SBarry Smith if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 273b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 274b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 275b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 276a8c6a408SBarry Smith if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 277a8c6a408SBarry Smith if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 278b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 279b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 280b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 281b49de8d1SLois Curfman McInnes } 282b49de8d1SLois Curfman McInnes else { 283905e6a2fSBarry Smith if (!aij->colmap) { 284905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 285905e6a2fSBarry Smith } 286905e6a2fSBarry Smith col = aij->colmap[idxn[j]] - 1; 287e60e1c95SSatish Balay if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 288d9d09a02SSatish Balay else { 289b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 290b49de8d1SLois Curfman McInnes } 291b49de8d1SLois Curfman McInnes } 292b49de8d1SLois Curfman McInnes } 293a8c6a408SBarry Smith } else { 294a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 295b49de8d1SLois Curfman McInnes } 296b49de8d1SLois Curfman McInnes } 2973a40ed3dSBarry Smith PetscFunctionReturn(0); 298b49de8d1SLois Curfman McInnes } 299b49de8d1SLois Curfman McInnes 3005615d1e5SSatish Balay #undef __FUNC__ 3015615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 3028f6be9afSLois Curfman McInnes int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 3038a729477SBarry Smith { 30444a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 305d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 30617699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 30717699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 3081eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 3096abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 3101eb62cbbSBarry Smith InsertMode addv; 3111eb62cbbSBarry Smith Scalar *rvalues,*svalues; 3121eb62cbbSBarry Smith 3133a40ed3dSBarry Smith PetscFunctionBegin; 3141eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 315ca161407SBarry Smith ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 316dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 317a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 3181eb62cbbSBarry Smith } 31947794344SBarry Smith mat->insertmode = addv; /* in case this processor had no cache */ 3201eb62cbbSBarry Smith 3211eb62cbbSBarry Smith /* first count number of contributors to each processor */ 3220452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 323cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 3240452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 3251eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 3261eb62cbbSBarry Smith idx = aij->stash.idx[i]; 32717699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 3281eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3291eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 3308a729477SBarry Smith } 3318a729477SBarry Smith } 3328a729477SBarry Smith } 33317699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 3341eb62cbbSBarry Smith 3351eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3360452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 337ca161407SBarry Smith ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 33817699dbbSLois Curfman McInnes nreceives = work[rank]; 339ca161407SBarry Smith ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 34017699dbbSLois Curfman McInnes nmax = work[rank]; 3410452661fSBarry Smith PetscFree(work); 3421eb62cbbSBarry Smith 3431eb62cbbSBarry Smith /* post receives: 3441eb62cbbSBarry Smith 1) each message will consist of ordered pairs 3451eb62cbbSBarry Smith (global index,value) we store the global index as a double 346d6dfbf8fSBarry Smith to simplify the message passing. 3471eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 3481eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 3491eb62cbbSBarry Smith this is a lot of wasted space. 3501eb62cbbSBarry Smith 3511eb62cbbSBarry Smith 3521eb62cbbSBarry Smith This could be done better. 3531eb62cbbSBarry Smith */ 354ca161407SBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 355ca161407SBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 3561eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 357ca161407SBarry Smith ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 358ca161407SBarry Smith comm,recv_waits+i);CHKERRQ(ierr); 3591eb62cbbSBarry Smith } 3601eb62cbbSBarry Smith 3611eb62cbbSBarry Smith /* do sends: 3621eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3631eb62cbbSBarry Smith the ith processor 3641eb62cbbSBarry Smith */ 3650452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 366ca161407SBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 3670452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 3681eb62cbbSBarry Smith starts[0] = 0; 36917699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3701eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 3711eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 3721eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 3731eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 3741eb62cbbSBarry Smith } 3750452661fSBarry Smith PetscFree(owner); 3761eb62cbbSBarry Smith starts[0] = 0; 37717699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3781eb62cbbSBarry Smith count = 0; 37917699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 3801eb62cbbSBarry Smith if (procs[i]) { 381ca161407SBarry Smith ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 3821eb62cbbSBarry Smith } 3831eb62cbbSBarry Smith } 3840452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 3851eb62cbbSBarry Smith 3861eb62cbbSBarry Smith /* Free cache space */ 38755908cccSLois Curfman McInnes PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 38878b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 3891eb62cbbSBarry Smith 3901eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 3911eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 3921eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 3931eb62cbbSBarry Smith aij->rmax = nmax; 3941eb62cbbSBarry Smith 3953a40ed3dSBarry Smith PetscFunctionReturn(0); 3961eb62cbbSBarry Smith } 39744a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 3981eb62cbbSBarry Smith 3995615d1e5SSatish Balay #undef __FUNC__ 4005615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 4018f6be9afSLois Curfman McInnes int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 4021eb62cbbSBarry Smith { 40344a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 4041eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 405416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 406905e6a2fSBarry Smith int row,col,other_disassembled; 4071eb62cbbSBarry Smith Scalar *values,val; 40847794344SBarry Smith InsertMode addv = mat->insertmode; 4091eb62cbbSBarry Smith 4103a40ed3dSBarry Smith PetscFunctionBegin; 4111eb62cbbSBarry Smith /* wait on receives */ 4121eb62cbbSBarry Smith while (count) { 413ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 4141eb62cbbSBarry Smith /* unpack receives into our local space */ 415d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 416ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 4171eb62cbbSBarry Smith n = n/3; 4181eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 419227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - aij->rstart; 420227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 4211eb62cbbSBarry Smith val = values[3*i+2]; 4221eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 4231eb62cbbSBarry Smith col -= aij->cstart; 4246fd7127cSSatish Balay ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 4253a40ed3dSBarry Smith } else { 42655a1b374SBarry Smith if (mat->was_assembled || mat->assembled) { 427905e6a2fSBarry Smith if (!aij->colmap) { 428905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 429905e6a2fSBarry Smith } 430905e6a2fSBarry Smith col = aij->colmap[col] - 1; 431ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4322493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 433227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 434d6dfbf8fSBarry Smith } 4359e25ed09SBarry Smith } 4366fd7127cSSatish Balay ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 4371eb62cbbSBarry Smith } 4381eb62cbbSBarry Smith } 4391eb62cbbSBarry Smith count--; 4401eb62cbbSBarry Smith } 4410452661fSBarry Smith PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 4421eb62cbbSBarry Smith 4431eb62cbbSBarry Smith /* wait on sends */ 4441eb62cbbSBarry Smith if (aij->nsends) { 4450a198c4cSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 446ca161407SBarry Smith ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 4470452661fSBarry Smith PetscFree(send_status); 4481eb62cbbSBarry Smith } 4490452661fSBarry Smith PetscFree(aij->send_waits); PetscFree(aij->svalues); 4501eb62cbbSBarry Smith 45178b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 45278b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 4531eb62cbbSBarry Smith 4542493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 4552493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 456ca161407SBarry Smith ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 457227d817aSBarry Smith if (mat->was_assembled && !other_disassembled) { 4582493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 4592493cbb0SBarry Smith } 4602493cbb0SBarry Smith 4616d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 46278b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 4635e42470aSBarry Smith } 46478b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 46578b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 4665e42470aSBarry Smith 4677a0afa10SBarry Smith if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 4683a40ed3dSBarry Smith PetscFunctionReturn(0); 4698a729477SBarry Smith } 4708a729477SBarry Smith 4715615d1e5SSatish Balay #undef __FUNC__ 4725615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIAIJ" 4738f6be9afSLois Curfman McInnes int MatZeroEntries_MPIAIJ(Mat A) 4741eb62cbbSBarry Smith { 47544a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 476dbd7a890SLois Curfman McInnes int ierr; 4773a40ed3dSBarry Smith 4783a40ed3dSBarry Smith PetscFunctionBegin; 47978b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 48078b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 4813a40ed3dSBarry Smith PetscFunctionReturn(0); 4821eb62cbbSBarry Smith } 4831eb62cbbSBarry Smith 4845615d1e5SSatish Balay #undef __FUNC__ 4855615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIAIJ" 4868f6be9afSLois Curfman McInnes int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 4871eb62cbbSBarry Smith { 48844a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 48917699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 4906a5c57faSSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work,row; 49117699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 4925392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 4936a5c57faSSatish Balay int *lens,imdex,*lrows,*values,rstart=l->rstart; 494d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 4951eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 4961eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 4971eb62cbbSBarry Smith IS istmp; 498*6eb55b6aSBarry Smith PetscTruth localdiag; 4991eb62cbbSBarry Smith 5003a40ed3dSBarry Smith PetscFunctionBegin; 50177c4ece6SBarry Smith ierr = ISGetSize(is,&N); CHKERRQ(ierr); 50278b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 5031eb62cbbSBarry Smith 5041eb62cbbSBarry Smith /* first count number of contributors to each processor */ 5050452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 506cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 5070452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 5081eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 5091eb62cbbSBarry Smith idx = rows[i]; 5101eb62cbbSBarry Smith found = 0; 51117699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 5121eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 5131eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 5141eb62cbbSBarry Smith } 5151eb62cbbSBarry Smith } 516a8c6a408SBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 5171eb62cbbSBarry Smith } 51817699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 5191eb62cbbSBarry Smith 5201eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 5210452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 522ca161407SBarry Smith ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 52317699dbbSLois Curfman McInnes nrecvs = work[rank]; 524ca161407SBarry Smith ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 52517699dbbSLois Curfman McInnes nmax = work[rank]; 5260452661fSBarry Smith PetscFree(work); 5271eb62cbbSBarry Smith 5281eb62cbbSBarry Smith /* post receives: */ 5293a40ed3dSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 530ca161407SBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 5311eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 532ca161407SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 5331eb62cbbSBarry Smith } 5341eb62cbbSBarry Smith 5351eb62cbbSBarry Smith /* do sends: 5361eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 5371eb62cbbSBarry Smith the ith processor 5381eb62cbbSBarry Smith */ 5390452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 5403a40ed3dSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 5410452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 5421eb62cbbSBarry Smith starts[0] = 0; 54317699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 5441eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 5451eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 5461eb62cbbSBarry Smith } 5471eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 5481eb62cbbSBarry Smith 5491eb62cbbSBarry Smith starts[0] = 0; 55017699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 5511eb62cbbSBarry Smith count = 0; 55217699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 5531eb62cbbSBarry Smith if (procs[i]) { 554ca161407SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 5551eb62cbbSBarry Smith } 5561eb62cbbSBarry Smith } 5570452661fSBarry Smith PetscFree(starts); 5581eb62cbbSBarry Smith 55917699dbbSLois Curfman McInnes base = owners[rank]; 5601eb62cbbSBarry Smith 5611eb62cbbSBarry Smith /* wait on receives */ 5620452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 5631eb62cbbSBarry Smith source = lens + nrecvs; 5641eb62cbbSBarry Smith count = nrecvs; slen = 0; 5651eb62cbbSBarry Smith while (count) { 566ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 5671eb62cbbSBarry Smith /* unpack receives into our local space */ 568ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 569d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 570d6dfbf8fSBarry Smith lens[imdex] = n; 5711eb62cbbSBarry Smith slen += n; 5721eb62cbbSBarry Smith count--; 5731eb62cbbSBarry Smith } 5740452661fSBarry Smith PetscFree(recv_waits); 5751eb62cbbSBarry Smith 5761eb62cbbSBarry Smith /* move the data into the send scatter */ 5770452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 5781eb62cbbSBarry Smith count = 0; 5791eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 5801eb62cbbSBarry Smith values = rvalues + i*nmax; 5811eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 5821eb62cbbSBarry Smith lrows[count++] = values[j] - base; 5831eb62cbbSBarry Smith } 5841eb62cbbSBarry Smith } 5850452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 5860452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 5871eb62cbbSBarry Smith 5881eb62cbbSBarry Smith /* actually zap the local rows */ 589029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 590464493b3SBarry Smith PLogObjectParent(A,istmp); 5916a5c57faSSatish Balay 592*6eb55b6aSBarry Smith /* 593*6eb55b6aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 594*6eb55b6aSBarry Smith is square and the user wishes to set the diagonal we use seperate 595*6eb55b6aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 596*6eb55b6aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 597*6eb55b6aSBarry Smith 598*6eb55b6aSBarry Smith Contributed by: Mathew Knepley 599*6eb55b6aSBarry Smith */ 600*6eb55b6aSBarry Smith localdiag = PETSC_FALSE; 601*6eb55b6aSBarry Smith if (diag && (l->A->M == l->A->N)) { 602*6eb55b6aSBarry Smith localdiag = PETSC_TRUE; 603*6eb55b6aSBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 604*6eb55b6aSBarry Smith } else { 6056a5c57faSSatish Balay ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 606*6eb55b6aSBarry Smith } 60778b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 60878b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 6091eb62cbbSBarry Smith 610*6eb55b6aSBarry Smith if (diag && (localdiag == PETSC_FALSE)) { 611*6eb55b6aSBarry Smith for ( i = 0; i < slen; i++ ) { 612*6eb55b6aSBarry Smith row = lrows[i] + rstart; 613*6eb55b6aSBarry Smith MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 614*6eb55b6aSBarry Smith } 615*6eb55b6aSBarry Smith MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 616*6eb55b6aSBarry Smith MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 617*6eb55b6aSBarry Smith } 618*6eb55b6aSBarry Smith 619*6eb55b6aSBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 620*6eb55b6aSBarry Smith 6216a5c57faSSatish Balay if (diag) { 6226a5c57faSSatish Balay for ( i = 0; i < slen; i++ ) { 6236a5c57faSSatish Balay row = lrows[i] + rstart; 6246a5c57faSSatish Balay MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 6256a5c57faSSatish Balay } 6266a5c57faSSatish Balay MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 6276a5c57faSSatish Balay MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 6286a5c57faSSatish Balay } 6296a5c57faSSatish Balay PetscFree(lrows); 6301eb62cbbSBarry Smith /* wait on sends */ 6311eb62cbbSBarry Smith if (nsends) { 632ca161407SBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 633ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 6340452661fSBarry Smith PetscFree(send_status); 6351eb62cbbSBarry Smith } 6360452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 6371eb62cbbSBarry Smith 6383a40ed3dSBarry Smith PetscFunctionReturn(0); 6391eb62cbbSBarry Smith } 6401eb62cbbSBarry Smith 6415615d1e5SSatish Balay #undef __FUNC__ 6425615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIAIJ" 6438f6be9afSLois Curfman McInnes int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 6441eb62cbbSBarry Smith { 645416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 646fbd6ef76SBarry Smith int ierr,nt; 647416022c9SBarry Smith 6483a40ed3dSBarry Smith PetscFunctionBegin; 649a2ce50c7SBarry Smith ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 650fbd6ef76SBarry Smith if (nt != a->n) { 651a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 652fbd6ef76SBarry Smith } 65343a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 654f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 65543a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 656f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 6573a40ed3dSBarry Smith PetscFunctionReturn(0); 6581eb62cbbSBarry Smith } 6591eb62cbbSBarry Smith 6605615d1e5SSatish Balay #undef __FUNC__ 6615615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIAIJ" 6628f6be9afSLois Curfman McInnes int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 663da3a660dSBarry Smith { 664416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 665da3a660dSBarry Smith int ierr; 6663a40ed3dSBarry Smith 6673a40ed3dSBarry Smith PetscFunctionBegin; 66843a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 669f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 67043a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 671f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 6723a40ed3dSBarry Smith PetscFunctionReturn(0); 673da3a660dSBarry Smith } 674da3a660dSBarry Smith 6755615d1e5SSatish Balay #undef __FUNC__ 6765615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIAIJ" 6778f6be9afSLois Curfman McInnes int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 678da3a660dSBarry Smith { 679416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 680da3a660dSBarry Smith int ierr; 681da3a660dSBarry Smith 6823a40ed3dSBarry Smith PetscFunctionBegin; 683da3a660dSBarry Smith /* do nondiagonal part */ 684f830108cSBarry Smith ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 685da3a660dSBarry Smith /* send it on its way */ 686537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 687da3a660dSBarry Smith /* do local part */ 688f830108cSBarry Smith ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 689da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 690da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 691da3a660dSBarry Smith /* but is not perhaps always true. */ 692537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 6933a40ed3dSBarry Smith PetscFunctionReturn(0); 694da3a660dSBarry Smith } 695da3a660dSBarry Smith 6965615d1e5SSatish Balay #undef __FUNC__ 6975615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIAIJ" 6988f6be9afSLois Curfman McInnes int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 699da3a660dSBarry Smith { 700416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 701da3a660dSBarry Smith int ierr; 702da3a660dSBarry Smith 7033a40ed3dSBarry Smith PetscFunctionBegin; 704da3a660dSBarry Smith /* do nondiagonal part */ 705f830108cSBarry Smith ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 706da3a660dSBarry Smith /* send it on its way */ 707537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 708da3a660dSBarry Smith /* do local part */ 709f830108cSBarry Smith ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 710da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 711da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 712da3a660dSBarry Smith /* but is not perhaps always true. */ 7130a198c4cSBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 7143a40ed3dSBarry Smith PetscFunctionReturn(0); 715da3a660dSBarry Smith } 716da3a660dSBarry Smith 7171eb62cbbSBarry Smith /* 7181eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 7191eb62cbbSBarry Smith diagonal block 7201eb62cbbSBarry Smith */ 7215615d1e5SSatish Balay #undef __FUNC__ 7225615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIAIJ" 7238f6be9afSLois Curfman McInnes int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 7241eb62cbbSBarry Smith { 7253a40ed3dSBarry Smith int ierr; 726416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 7273a40ed3dSBarry Smith 7283a40ed3dSBarry Smith PetscFunctionBegin; 729a8c6a408SBarry Smith if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 7305baf8537SBarry Smith if (a->rstart != a->cstart || a->rend != a->cend) { 731a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 7323a40ed3dSBarry Smith } 7333a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 7343a40ed3dSBarry Smith PetscFunctionReturn(0); 7351eb62cbbSBarry Smith } 7361eb62cbbSBarry Smith 7375615d1e5SSatish Balay #undef __FUNC__ 7385615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIAIJ" 7398f6be9afSLois Curfman McInnes int MatScale_MPIAIJ(Scalar *aa,Mat A) 740052efed2SBarry Smith { 741052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 742052efed2SBarry Smith int ierr; 7433a40ed3dSBarry Smith 7443a40ed3dSBarry Smith PetscFunctionBegin; 745052efed2SBarry Smith ierr = MatScale(aa,a->A); CHKERRQ(ierr); 746052efed2SBarry Smith ierr = MatScale(aa,a->B); CHKERRQ(ierr); 7473a40ed3dSBarry Smith PetscFunctionReturn(0); 748052efed2SBarry Smith } 749052efed2SBarry Smith 7505615d1e5SSatish Balay #undef __FUNC__ 751d4bb536fSBarry Smith #define __FUNC__ "MatDestroy_MPIAIJ" 752e1311b90SBarry Smith int MatDestroy_MPIAIJ(Mat mat) 7531eb62cbbSBarry Smith { 75444a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 7551eb62cbbSBarry Smith int ierr; 75683e2fdc7SBarry Smith 7573a40ed3dSBarry Smith PetscFunctionBegin; 7583a40ed3dSBarry Smith #if defined(USE_PETSC_LOG) 759e1311b90SBarry Smith PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 760a5a9c739SBarry Smith #endif 76183e2fdc7SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 7620452661fSBarry Smith PetscFree(aij->rowners); 76378b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 76478b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 7650452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 7660452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 7671eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 768a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 7697a0afa10SBarry Smith if (aij->rowvalues) PetscFree(aij->rowvalues); 7700452661fSBarry Smith PetscFree(aij); 771a5a9c739SBarry Smith PLogObjectDestroy(mat); 7720452661fSBarry Smith PetscHeaderDestroy(mat); 7733a40ed3dSBarry Smith PetscFunctionReturn(0); 7741eb62cbbSBarry Smith } 775ee50ffe9SBarry Smith 7765615d1e5SSatish Balay #undef __FUNC__ 777d4bb536fSBarry Smith #define __FUNC__ "MatView_MPIAIJ_Binary" 7788f6be9afSLois Curfman McInnes extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 7791eb62cbbSBarry Smith { 780416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 781416022c9SBarry Smith int ierr; 782416022c9SBarry Smith 7833a40ed3dSBarry Smith PetscFunctionBegin; 78417699dbbSLois Curfman McInnes if (aij->size == 1) { 785416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 786416022c9SBarry Smith } 787a8c6a408SBarry Smith else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 7883a40ed3dSBarry Smith PetscFunctionReturn(0); 789416022c9SBarry Smith } 790416022c9SBarry Smith 7915615d1e5SSatish Balay #undef __FUNC__ 792d4bb536fSBarry Smith #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 7938f6be9afSLois Curfman McInnes extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 794416022c9SBarry Smith { 79544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 796dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 797a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 798d636dbe3SBarry Smith FILE *fd; 79919bcc07fSBarry Smith ViewerType vtype; 800416022c9SBarry Smith 8013a40ed3dSBarry Smith PetscFunctionBegin; 80219bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 80319bcc07fSBarry Smith if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 80490ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 8050a198c4cSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 8064e220ebcSLois Curfman McInnes MatInfo info; 8074e220ebcSLois Curfman McInnes int flg; 808a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 80990ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 8104e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 81195e01e2fSLois Curfman McInnes ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 81277c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 81395e01e2fSLois Curfman McInnes if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 8144e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 81595e01e2fSLois Curfman McInnes else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 8164e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 8174e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 8184e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 8194e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 8204e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 821a56f8943SBarry Smith fflush(fd); 82277c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 823a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 8243a40ed3dSBarry Smith PetscFunctionReturn(0); 8253a40ed3dSBarry Smith } else if (format == VIEWER_FORMAT_ASCII_INFO) { 8263a40ed3dSBarry Smith PetscFunctionReturn(0); 82708480c60SBarry Smith } 828416022c9SBarry Smith } 829416022c9SBarry Smith 83019bcc07fSBarry Smith if (vtype == DRAW_VIEWER) { 83119bcc07fSBarry Smith Draw draw; 83219bcc07fSBarry Smith PetscTruth isnull; 83319bcc07fSBarry Smith ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 8343a40ed3dSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 83519bcc07fSBarry Smith } 83619bcc07fSBarry Smith 83719bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER) { 83890ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 83977c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 840d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 84117699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 8421eb62cbbSBarry Smith aij->cend); 84378b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 84478b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 845d13ab20cSBarry Smith fflush(fd); 84677c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 8473a40ed3dSBarry Smith } else { 848a56f8943SBarry Smith int size = aij->size; 849a56f8943SBarry Smith rank = aij->rank; 85017699dbbSLois Curfman McInnes if (size == 1) { 85178b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 8523a40ed3dSBarry Smith } else { 85395373324SBarry Smith /* assemble the entire matrix onto first processor. */ 85495373324SBarry Smith Mat A; 855ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 8562eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 85795373324SBarry Smith Scalar *a; 8582ee70a88SLois Curfman McInnes 85917699dbbSLois Curfman McInnes if (!rank) { 86055843e3eSBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 8613a40ed3dSBarry Smith } else { 86255843e3eSBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 86395373324SBarry Smith } 864464493b3SBarry Smith PLogObjectParent(mat,A); 865416022c9SBarry Smith 86695373324SBarry Smith /* copy over the A part */ 867ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 8682ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 86995373324SBarry Smith row = aij->rstart; 870dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 87195373324SBarry Smith for ( i=0; i<m; i++ ) { 872416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 87395373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 87495373324SBarry Smith } 8752ee70a88SLois Curfman McInnes aj = Aloc->j; 876dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 87795373324SBarry Smith 87895373324SBarry Smith /* copy over the B part */ 879ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 8802ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 88195373324SBarry Smith row = aij->rstart; 8820452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 883dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 88495373324SBarry Smith for ( i=0; i<m; i++ ) { 885416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 88695373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 88795373324SBarry Smith } 8880452661fSBarry Smith PetscFree(ct); 8896d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 8906d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 89155843e3eSBarry Smith /* 89255843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 89355843e3eSBarry Smith synchronized across all processors that share the Draw object 89455843e3eSBarry Smith */ 89555843e3eSBarry Smith if (!rank || vtype == DRAW_VIEWER) { 89678b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 89795373324SBarry Smith } 89878b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 89995373324SBarry Smith } 90095373324SBarry Smith } 9013a40ed3dSBarry Smith PetscFunctionReturn(0); 9021eb62cbbSBarry Smith } 9031eb62cbbSBarry Smith 9045615d1e5SSatish Balay #undef __FUNC__ 905d4bb536fSBarry Smith #define __FUNC__ "MatView_MPIAIJ" 906e1311b90SBarry Smith int MatView_MPIAIJ(Mat mat,Viewer viewer) 907416022c9SBarry Smith { 908416022c9SBarry Smith int ierr; 90919bcc07fSBarry Smith ViewerType vtype; 910416022c9SBarry Smith 9113a40ed3dSBarry Smith PetscFunctionBegin; 91219bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 91319bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 91419bcc07fSBarry Smith vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 915d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 9165cd90555SBarry Smith } else if (vtype == BINARY_FILE_VIEWER) { 9173a40ed3dSBarry Smith ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 9185cd90555SBarry Smith } else { 9195cd90555SBarry Smith SETERRQ(1,1,"Viewer type not supported by PETSc object"); 920416022c9SBarry Smith } 9213a40ed3dSBarry Smith PetscFunctionReturn(0); 922416022c9SBarry Smith } 923416022c9SBarry Smith 9241eb62cbbSBarry Smith /* 9251eb62cbbSBarry Smith This has to provide several versions. 9261eb62cbbSBarry Smith 9271eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 9281eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 929d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 9301eb62cbbSBarry Smith */ 9315615d1e5SSatish Balay #undef __FUNC__ 9325615d1e5SSatish Balay #define __FUNC__ "MatRelax_MPIAIJ" 9338f6be9afSLois Curfman McInnes int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 934dbb450caSBarry Smith double fshift,int its,Vec xx) 9358a729477SBarry Smith { 93644a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 937d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 938ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 939c16cb8f2SBarry Smith Scalar *b,*x,*xs,*ls,d,*v,sum; 9406abc6512SBarry Smith int ierr,*idx, *diag; 941416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 9428a729477SBarry Smith 9433a40ed3dSBarry Smith PetscFunctionBegin; 944d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 945dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 946dbb450caSBarry Smith ls = ls + shift; 94783e2fdc7SBarry Smith if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 948d6dfbf8fSBarry Smith diag = A->diag; 949c16cb8f2SBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 950da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 951f830108cSBarry Smith ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 9523a40ed3dSBarry Smith PetscFunctionReturn(0); 953da3a660dSBarry Smith } 9543a40ed3dSBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 9553a40ed3dSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 956d6dfbf8fSBarry Smith while (its--) { 957d6dfbf8fSBarry Smith /* go down through the rows */ 958d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 959d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 960dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 961dbb450caSBarry Smith v = A->a + A->i[i] + shift; 962d6dfbf8fSBarry Smith sum = b[i]; 963d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 964dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 965d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 966dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 967dbb450caSBarry Smith v = B->a + B->i[i] + shift; 968d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 96955a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 970d6dfbf8fSBarry Smith } 971d6dfbf8fSBarry Smith /* come up through the rows */ 972d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 973d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 974dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 975dbb450caSBarry Smith v = A->a + A->i[i] + shift; 976d6dfbf8fSBarry Smith sum = b[i]; 977d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 978dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 979d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 980dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 981dbb450caSBarry Smith v = B->a + B->i[i] + shift; 982d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 98355a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 984d6dfbf8fSBarry Smith } 985d6dfbf8fSBarry Smith } 9863a40ed3dSBarry Smith } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 987da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 988f830108cSBarry Smith ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 9893a40ed3dSBarry Smith PetscFunctionReturn(0); 990da3a660dSBarry Smith } 9913a40ed3dSBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 9923a40ed3dSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 993d6dfbf8fSBarry Smith while (its--) { 994d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 995d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 996dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 997dbb450caSBarry Smith v = A->a + A->i[i] + shift; 998d6dfbf8fSBarry Smith sum = b[i]; 999d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 1000dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 1001d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 1002dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 1003dbb450caSBarry Smith v = B->a + B->i[i] + shift; 1004d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 100555a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1006d6dfbf8fSBarry Smith } 1007d6dfbf8fSBarry Smith } 10083a40ed3dSBarry Smith } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1009da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 1010f830108cSBarry Smith ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 10113a40ed3dSBarry Smith PetscFunctionReturn(0); 1012da3a660dSBarry Smith } 101343a90d84SBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 101478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 101543a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 101678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 1017d6dfbf8fSBarry Smith while (its--) { 1018d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 1019d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 1020dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 1021dbb450caSBarry Smith v = A->a + A->i[i] + shift; 1022d6dfbf8fSBarry Smith sum = b[i]; 1023d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 1024dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 1025d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 1026dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 1027dbb450caSBarry Smith v = B->a + B->i[i] + shift; 1028d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 102955a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1030d6dfbf8fSBarry Smith } 1031d6dfbf8fSBarry Smith } 10323a40ed3dSBarry Smith } else { 1033a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1034c16cb8f2SBarry Smith } 10353a40ed3dSBarry Smith PetscFunctionReturn(0); 10368a729477SBarry Smith } 1037a66be287SLois Curfman McInnes 10385615d1e5SSatish Balay #undef __FUNC__ 1039d4bb536fSBarry Smith #define __FUNC__ "MatGetInfo_MPIAIJ" 10408f6be9afSLois Curfman McInnes int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1041a66be287SLois Curfman McInnes { 1042a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1043a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 10444e220ebcSLois Curfman McInnes int ierr; 10454e220ebcSLois Curfman McInnes double isend[5], irecv[5]; 1046a66be287SLois Curfman McInnes 10473a40ed3dSBarry Smith PetscFunctionBegin; 10484e220ebcSLois Curfman McInnes info->block_size = 1.0; 10494e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 10504e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 10514e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 10524e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 10534e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 10544e220ebcSLois Curfman McInnes isend[3] += info->memory; isend[4] += info->mallocs; 1055a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 10564e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 10574e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 10584e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 10594e220ebcSLois Curfman McInnes info->memory = isend[3]; 10604e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 1061a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 1062ca161407SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 10634e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 10644e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 10654e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 10664e220ebcSLois Curfman McInnes info->memory = irecv[3]; 10674e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1068a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 1069ca161407SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 10704e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 10714e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 10724e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 10734e220ebcSLois Curfman McInnes info->memory = irecv[3]; 10744e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1075a66be287SLois Curfman McInnes } 10764e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 10774e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 10784e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 10798d700155SBarry Smith info->rows_global = (double)mat->M; 10808d700155SBarry Smith info->columns_global = (double)mat->N; 10818d700155SBarry Smith info->rows_local = (double)mat->m; 10828d700155SBarry Smith info->columns_local = (double)mat->N; 10834e220ebcSLois Curfman McInnes 10843a40ed3dSBarry Smith PetscFunctionReturn(0); 1085a66be287SLois Curfman McInnes } 1086a66be287SLois Curfman McInnes 10875615d1e5SSatish Balay #undef __FUNC__ 1088d4bb536fSBarry Smith #define __FUNC__ "MatSetOption_MPIAIJ" 10898f6be9afSLois Curfman McInnes int MatSetOption_MPIAIJ(Mat A,MatOption op) 1090c74985f6SBarry Smith { 1091c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1092c74985f6SBarry Smith 10933a40ed3dSBarry Smith PetscFunctionBegin; 10946d4a8577SBarry Smith if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 10956d4a8577SBarry Smith op == MAT_YES_NEW_NONZERO_LOCATIONS || 10966da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1097c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 109896854ed6SLois Curfman McInnes op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1099c2653b3dSLois Curfman McInnes op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1100b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 1101b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 1102b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1103aeafbbfcSLois Curfman McInnes a->roworiented = 1; 1104c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 1105c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 1106b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 11076da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 11086d4a8577SBarry Smith op == MAT_SYMMETRIC || 11096d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 11106d4a8577SBarry Smith op == MAT_YES_NEW_DIAGONALS) 1111981c4779SBarry Smith PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 11126d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) { 11134b0e389bSBarry Smith a->roworiented = 0; 11144b0e389bSBarry Smith MatSetOption(a->A,op); 11154b0e389bSBarry Smith MatSetOption(a->B,op); 11162b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 111790f02eecSBarry Smith a->donotstash = 1; 11183a40ed3dSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS){ 11193a40ed3dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 11203a40ed3dSBarry Smith } else { 11213a40ed3dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 11223a40ed3dSBarry Smith } 11233a40ed3dSBarry Smith PetscFunctionReturn(0); 1124c74985f6SBarry Smith } 1125c74985f6SBarry Smith 11265615d1e5SSatish Balay #undef __FUNC__ 1127d4bb536fSBarry Smith #define __FUNC__ "MatGetSize_MPIAIJ" 11288f6be9afSLois Curfman McInnes int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1129c74985f6SBarry Smith { 113044a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 11313a40ed3dSBarry Smith 11323a40ed3dSBarry Smith PetscFunctionBegin; 11330752156aSBarry Smith if (m) *m = mat->M; 11340752156aSBarry Smith if (n) *n = mat->N; 11353a40ed3dSBarry Smith PetscFunctionReturn(0); 1136c74985f6SBarry Smith } 1137c74985f6SBarry Smith 11385615d1e5SSatish Balay #undef __FUNC__ 1139d4bb536fSBarry Smith #define __FUNC__ "MatGetLocalSize_MPIAIJ" 11408f6be9afSLois Curfman McInnes int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1141c74985f6SBarry Smith { 114244a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 11433a40ed3dSBarry Smith 11443a40ed3dSBarry Smith PetscFunctionBegin; 11450752156aSBarry Smith if (m) *m = mat->m; 1146f830108cSBarry Smith if (n) *n = mat->n; 11473a40ed3dSBarry Smith PetscFunctionReturn(0); 1148c74985f6SBarry Smith } 1149c74985f6SBarry Smith 11505615d1e5SSatish Balay #undef __FUNC__ 1151d4bb536fSBarry Smith #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 11528f6be9afSLois Curfman McInnes int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1153c74985f6SBarry Smith { 115444a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 11553a40ed3dSBarry Smith 11563a40ed3dSBarry Smith PetscFunctionBegin; 1157c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 11583a40ed3dSBarry Smith PetscFunctionReturn(0); 1159c74985f6SBarry Smith } 1160c74985f6SBarry Smith 11616d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11626d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11636d84be18SBarry Smith 11645615d1e5SSatish Balay #undef __FUNC__ 11655615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIAIJ" 11666d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 116739e00950SLois Curfman McInnes { 1168154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 116970f0671dSBarry Smith Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1170154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1171154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 117270f0671dSBarry Smith int *cmap, *idx_p; 117339e00950SLois Curfman McInnes 11743a40ed3dSBarry Smith PetscFunctionBegin; 1175a8c6a408SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 11767a0afa10SBarry Smith mat->getrowactive = PETSC_TRUE; 11777a0afa10SBarry Smith 117870f0671dSBarry Smith if (!mat->rowvalues && (idx || v)) { 11797a0afa10SBarry Smith /* 11807a0afa10SBarry Smith allocate enough space to hold information from the longest row. 11817a0afa10SBarry Smith */ 11827a0afa10SBarry Smith Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1183c16cb8f2SBarry Smith int max = 1,m = mat->m,tmp; 1184c16cb8f2SBarry Smith for ( i=0; i<m; i++ ) { 11857a0afa10SBarry Smith tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 11867a0afa10SBarry Smith if (max < tmp) { max = tmp; } 11877a0afa10SBarry Smith } 11887a0afa10SBarry Smith mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 11897a0afa10SBarry Smith CHKPTRQ(mat->rowvalues); 11907a0afa10SBarry Smith mat->rowindices = (int *) (mat->rowvalues + max); 11917a0afa10SBarry Smith } 11927a0afa10SBarry Smith 1193a8c6a408SBarry Smith if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1194abc0e9e4SLois Curfman McInnes lrow = row - rstart; 119539e00950SLois Curfman McInnes 1196154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1197154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1198154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 1199f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1200f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1201154123eaSLois Curfman McInnes nztot = nzA + nzB; 1202154123eaSLois Curfman McInnes 120370f0671dSBarry Smith cmap = mat->garray; 1204154123eaSLois Curfman McInnes if (v || idx) { 1205154123eaSLois Curfman McInnes if (nztot) { 1206154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 120770f0671dSBarry Smith int imark = -1; 1208154123eaSLois Curfman McInnes if (v) { 120970f0671dSBarry Smith *v = v_p = mat->rowvalues; 121039e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 121170f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1212154123eaSLois Curfman McInnes else break; 1213154123eaSLois Curfman McInnes } 1214154123eaSLois Curfman McInnes imark = i; 121570f0671dSBarry Smith for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 121670f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1217154123eaSLois Curfman McInnes } 1218154123eaSLois Curfman McInnes if (idx) { 121970f0671dSBarry Smith *idx = idx_p = mat->rowindices; 122070f0671dSBarry Smith if (imark > -1) { 122170f0671dSBarry Smith for ( i=0; i<imark; i++ ) { 122270f0671dSBarry Smith idx_p[i] = cmap[cworkB[i]]; 122370f0671dSBarry Smith } 122470f0671dSBarry Smith } else { 1225154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 122670f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1227154123eaSLois Curfman McInnes else break; 1228154123eaSLois Curfman McInnes } 1229154123eaSLois Curfman McInnes imark = i; 123070f0671dSBarry Smith } 123170f0671dSBarry Smith for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 123270f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 123339e00950SLois Curfman McInnes } 123439e00950SLois Curfman McInnes } 12351ca473b0SSatish Balay else { 12361ca473b0SSatish Balay if (idx) *idx = 0; 12371ca473b0SSatish Balay if (v) *v = 0; 12381ca473b0SSatish Balay } 1239154123eaSLois Curfman McInnes } 124039e00950SLois Curfman McInnes *nz = nztot; 1241f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1242f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 12433a40ed3dSBarry Smith PetscFunctionReturn(0); 124439e00950SLois Curfman McInnes } 124539e00950SLois Curfman McInnes 12465615d1e5SSatish Balay #undef __FUNC__ 1247d4bb536fSBarry Smith #define __FUNC__ "MatRestoreRow_MPIAIJ" 12486d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 124939e00950SLois Curfman McInnes { 12507a0afa10SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 12513a40ed3dSBarry Smith 12523a40ed3dSBarry Smith PetscFunctionBegin; 12537a0afa10SBarry Smith if (aij->getrowactive == PETSC_FALSE) { 1254a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 12557a0afa10SBarry Smith } 12567a0afa10SBarry Smith aij->getrowactive = PETSC_FALSE; 12573a40ed3dSBarry Smith PetscFunctionReturn(0); 125839e00950SLois Curfman McInnes } 125939e00950SLois Curfman McInnes 12605615d1e5SSatish Balay #undef __FUNC__ 12615615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIAIJ" 12628f6be9afSLois Curfman McInnes int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1263855ac2c5SLois Curfman McInnes { 1264855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1265ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1266416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1267416022c9SBarry Smith double sum = 0.0; 126804ca555eSLois Curfman McInnes Scalar *v; 126904ca555eSLois Curfman McInnes 12703a40ed3dSBarry Smith PetscFunctionBegin; 127117699dbbSLois Curfman McInnes if (aij->size == 1) { 127214183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 127337fa93a5SLois Curfman McInnes } else { 127404ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 127504ca555eSLois Curfman McInnes v = amat->a; 127604ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 12773a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 127804ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 127904ca555eSLois Curfman McInnes #else 128004ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 128104ca555eSLois Curfman McInnes #endif 128204ca555eSLois Curfman McInnes } 128304ca555eSLois Curfman McInnes v = bmat->a; 128404ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 12853a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 128604ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 128704ca555eSLois Curfman McInnes #else 128804ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 128904ca555eSLois Curfman McInnes #endif 129004ca555eSLois Curfman McInnes } 1291ca161407SBarry Smith ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 129204ca555eSLois Curfman McInnes *norm = sqrt(*norm); 12933a40ed3dSBarry Smith } else if (type == NORM_1) { /* max column norm */ 129404ca555eSLois Curfman McInnes double *tmp, *tmp2; 129504ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 1296758f045eSSatish Balay tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1297758f045eSSatish Balay tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1298cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 129904ca555eSLois Curfman McInnes *norm = 0.0; 130004ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 130104ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1302579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 130304ca555eSLois Curfman McInnes } 130404ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 130504ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1306579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 130704ca555eSLois Curfman McInnes } 1308ca161407SBarry Smith ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 130904ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 131004ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 131104ca555eSLois Curfman McInnes } 13120452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 13133a40ed3dSBarry Smith } else if (type == NORM_INFINITY) { /* max row norm */ 1314515d9167SLois Curfman McInnes double ntemp = 0.0; 131504ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1316dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 131704ca555eSLois Curfman McInnes sum = 0.0; 131804ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1319cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 132004ca555eSLois Curfman McInnes } 1321dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 132204ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1323cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 132404ca555eSLois Curfman McInnes } 1325515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 132604ca555eSLois Curfman McInnes } 1327ca161407SBarry Smith ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1328ca161407SBarry Smith } else { 1329a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 133004ca555eSLois Curfman McInnes } 133137fa93a5SLois Curfman McInnes } 13323a40ed3dSBarry Smith PetscFunctionReturn(0); 1333855ac2c5SLois Curfman McInnes } 1334855ac2c5SLois Curfman McInnes 13355615d1e5SSatish Balay #undef __FUNC__ 13365615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIAIJ" 13378f6be9afSLois Curfman McInnes int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1338b7c46309SBarry Smith { 1339b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1340dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1341416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1342b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 13433a40ed3dSBarry Smith Mat B; 1344b7c46309SBarry Smith Scalar *array; 1345b7c46309SBarry Smith 13463a40ed3dSBarry Smith PetscFunctionBegin; 1347d4bb536fSBarry Smith if (matout == PETSC_NULL && M != N) { 1348a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1349d4bb536fSBarry Smith } 1350d4bb536fSBarry Smith 1351d4bb536fSBarry Smith ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1352b7c46309SBarry Smith 1353b7c46309SBarry Smith /* copy over the A part */ 1354ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1355b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1356b7c46309SBarry Smith row = a->rstart; 1357dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1358b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1359416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1360b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1361b7c46309SBarry Smith } 1362b7c46309SBarry Smith aj = Aloc->j; 13634af08d9eSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1364b7c46309SBarry Smith 1365b7c46309SBarry Smith /* copy over the B part */ 1366ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1367b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1368b7c46309SBarry Smith row = a->rstart; 13690452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1370dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1371b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1372416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1373b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1374b7c46309SBarry Smith } 13750452661fSBarry Smith PetscFree(ct); 13766d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 13776d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 13783638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 13790de55854SLois Curfman McInnes *matout = B; 13800de55854SLois Curfman McInnes } else { 1381f830108cSBarry Smith PetscOps *Abops; 1382f830108cSBarry Smith struct _MatOps *Aops; 1383f830108cSBarry Smith 13840de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 13850452661fSBarry Smith PetscFree(a->rowners); 13860de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 13870de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 13880452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 13890452661fSBarry Smith if (a->garray) PetscFree(a->garray); 13900de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1391a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 13920452661fSBarry Smith PetscFree(a); 1393f830108cSBarry Smith 1394f830108cSBarry Smith /* 1395f830108cSBarry Smith This is horrible, horrible code. We need to keep the 1396f830108cSBarry Smith A pointers for the bops and ops but copy everything 1397f830108cSBarry Smith else from C. 1398f830108cSBarry Smith */ 1399f830108cSBarry Smith Abops = A->bops; 1400f830108cSBarry Smith Aops = A->ops; 1401f09e8eb9SSatish Balay PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1402f830108cSBarry Smith A->bops = Abops; 1403f830108cSBarry Smith A->ops = Aops; 14040452661fSBarry Smith PetscHeaderDestroy(B); 14050de55854SLois Curfman McInnes } 14063a40ed3dSBarry Smith PetscFunctionReturn(0); 1407b7c46309SBarry Smith } 1408b7c46309SBarry Smith 14095615d1e5SSatish Balay #undef __FUNC__ 14105615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIAIJ" 14114b967eb1SSatish Balay int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1412a008b906SSatish Balay { 14134b967eb1SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 14144b967eb1SSatish Balay Mat a = aij->A, b = aij->B; 1415a008b906SSatish Balay int ierr,s1,s2,s3; 1416a008b906SSatish Balay 14173a40ed3dSBarry Smith PetscFunctionBegin; 14184b967eb1SSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 14194b967eb1SSatish Balay if (rr) { 1420e1311b90SBarry Smith ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1421a8c6a408SBarry Smith if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 14224b967eb1SSatish Balay /* Overlap communication with computation. */ 142343a90d84SBarry Smith ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1424a008b906SSatish Balay } 14254b967eb1SSatish Balay if (ll) { 1426e1311b90SBarry Smith ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1427a8c6a408SBarry Smith if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1428f830108cSBarry Smith ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 14294b967eb1SSatish Balay } 14304b967eb1SSatish Balay /* scale the diagonal block */ 1431f830108cSBarry Smith ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 14324b967eb1SSatish Balay 14334b967eb1SSatish Balay if (rr) { 14344b967eb1SSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 143543a90d84SBarry Smith ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1436f830108cSBarry Smith ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 14374b967eb1SSatish Balay } 14384b967eb1SSatish Balay 14393a40ed3dSBarry Smith PetscFunctionReturn(0); 1440a008b906SSatish Balay } 1441a008b906SSatish Balay 1442a008b906SSatish Balay 1443682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat); 14445615d1e5SSatish Balay #undef __FUNC__ 1445d4bb536fSBarry Smith #define __FUNC__ "MatPrintHelp_MPIAIJ" 14468f6be9afSLois Curfman McInnes int MatPrintHelp_MPIAIJ(Mat A) 1447682d7d0cSBarry Smith { 1448682d7d0cSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 14493a40ed3dSBarry Smith int ierr; 1450682d7d0cSBarry Smith 14513a40ed3dSBarry Smith PetscFunctionBegin; 14523a40ed3dSBarry Smith if (!a->rank) { 14533a40ed3dSBarry Smith ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 14543a40ed3dSBarry Smith } 14553a40ed3dSBarry Smith PetscFunctionReturn(0); 1456682d7d0cSBarry Smith } 1457682d7d0cSBarry Smith 14585615d1e5SSatish Balay #undef __FUNC__ 1459d4bb536fSBarry Smith #define __FUNC__ "MatGetBlockSize_MPIAIJ" 14608f6be9afSLois Curfman McInnes int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 14615a838052SSatish Balay { 14623a40ed3dSBarry Smith PetscFunctionBegin; 14635a838052SSatish Balay *bs = 1; 14643a40ed3dSBarry Smith PetscFunctionReturn(0); 14655a838052SSatish Balay } 14665615d1e5SSatish Balay #undef __FUNC__ 1467d4bb536fSBarry Smith #define __FUNC__ "MatSetUnfactored_MPIAIJ" 14688f6be9afSLois Curfman McInnes int MatSetUnfactored_MPIAIJ(Mat A) 1469bb5a7306SBarry Smith { 1470bb5a7306SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1471bb5a7306SBarry Smith int ierr; 14723a40ed3dSBarry Smith 14733a40ed3dSBarry Smith PetscFunctionBegin; 1474bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 14753a40ed3dSBarry Smith PetscFunctionReturn(0); 1476bb5a7306SBarry Smith } 1477bb5a7306SBarry Smith 1478d4bb536fSBarry Smith #undef __FUNC__ 1479d4bb536fSBarry Smith #define __FUNC__ "MatEqual_MPIAIJ" 1480d4bb536fSBarry Smith int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1481d4bb536fSBarry Smith { 1482d4bb536fSBarry Smith Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1483d4bb536fSBarry Smith Mat a, b, c, d; 1484d4bb536fSBarry Smith PetscTruth flg; 1485d4bb536fSBarry Smith int ierr; 1486d4bb536fSBarry Smith 14873a40ed3dSBarry Smith PetscFunctionBegin; 1488a8c6a408SBarry Smith if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1489d4bb536fSBarry Smith a = matA->A; b = matA->B; 1490d4bb536fSBarry Smith c = matB->A; d = matB->B; 1491d4bb536fSBarry Smith 1492d4bb536fSBarry Smith ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1493d4bb536fSBarry Smith if (flg == PETSC_TRUE) { 1494d4bb536fSBarry Smith ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1495d4bb536fSBarry Smith } 1496ca161407SBarry Smith ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 14973a40ed3dSBarry Smith PetscFunctionReturn(0); 1498d4bb536fSBarry Smith } 1499d4bb536fSBarry Smith 15008f6be9afSLois Curfman McInnes extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 15012f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 15020a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 15030a198c4cSBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 15046a6a5d1dSBarry Smith extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *); 150500e6dbe6SBarry Smith 15068a729477SBarry Smith /* -------------------------------------------------------------------*/ 15072ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 150839e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 150944a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 151044a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 151136ce4990SBarry Smith 0,0, 151236ce4990SBarry Smith 0,0, 151336ce4990SBarry Smith 0,0, 151444a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1515b7c46309SBarry Smith MatTranspose_MPIAIJ, 1516d4bb536fSBarry Smith MatGetInfo_MPIAIJ,MatEqual_MPIAIJ, 1517a008b906SSatish Balay MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1518ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 15191eb62cbbSBarry Smith 0, 1520299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 152136ce4990SBarry Smith 0,0,0,0, 1522d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 152336ce4990SBarry Smith 0,0, 152494a9d846SBarry Smith 0,0,MatConvertSameType_MPIAIJ,0,0, 1525b49de8d1SLois Curfman McInnes 0,0,0, 1526598137ffSSatish Balay MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1527052efed2SBarry Smith MatPrintHelp_MPIAIJ, 15283b2fbd54SBarry Smith MatScale_MPIAIJ,0,0,0, 15290a198c4cSBarry Smith MatGetBlockSize_MPIAIJ,0,0,0,0, 153000e6dbe6SBarry Smith MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ, 1531ca161407SBarry Smith 0,0,MatGetSubMatrix_MPIAIJ}; 153236ce4990SBarry Smith 15338a729477SBarry Smith 15345615d1e5SSatish Balay #undef __FUNC__ 15355615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIAIJ" 15361987afe7SBarry Smith /*@C 1537ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 15383a511b96SLois Curfman McInnes (the default parallel PETSc format). For good matrix assembly performance 15393a511b96SLois Curfman McInnes the user should preallocate the matrix storage by setting the parameters 15403a511b96SLois Curfman McInnes d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 15413a511b96SLois Curfman McInnes performance can be increased by more than a factor of 50. 15428a729477SBarry Smith 1543db81eaa0SLois Curfman McInnes Collective on MPI_Comm 1544db81eaa0SLois Curfman McInnes 15458a729477SBarry Smith Input Parameters: 1546db81eaa0SLois Curfman McInnes + comm - MPI communicator 15477d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 154892e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 154992e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 15501a3896d6SBarry Smith . n - This value should be the same as the local size used in creating the 15511a3896d6SBarry Smith x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 15521a3896d6SBarry Smith calculated if N is given) For square matrices n is almost always m. 15537d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 155492e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1555ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1556ff756334SLois Curfman McInnes (same for all local rows) 15572bd5e0b2SLois Curfman McInnes . d_nzz - array containing the number of nonzeros in the various rows of the 155892e8d321SLois Curfman McInnes diagonal portion of the local submatrix (possibly different for each row) 15592bd5e0b2SLois Curfman McInnes or PETSC_NULL. You must leave room for the diagonal entry even if 15602bd5e0b2SLois Curfman McInnes it is zero. 15612bd5e0b2SLois Curfman McInnes . o_nz - number of nonzeros per row in the off-diagonal portion of local 1562ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 1563db81eaa0SLois Curfman McInnes - o_nzz - array containing the number of nonzeros in the various rows of the 15642bd5e0b2SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 15652bd5e0b2SLois Curfman McInnes each row) or PETSC_NULL. 15668a729477SBarry Smith 1567ff756334SLois Curfman McInnes Output Parameter: 156844cd7ae7SLois Curfman McInnes . A - the matrix 15698a729477SBarry Smith 1570b259b22eSLois Curfman McInnes Notes: 1571ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1572ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 15730002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 15740002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1575ff756334SLois Curfman McInnes 1576ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1577ff756334SLois Curfman McInnes (possibly both). 1578ff756334SLois Curfman McInnes 15795511cfe3SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 15805511cfe3SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 15815511cfe3SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 15825511cfe3SLois Curfman McInnes 15835511cfe3SLois Curfman McInnes Options Database Keys: 1584db81eaa0SLois Curfman McInnes + -mat_aij_no_inode - Do not use inodes 1585db81eaa0SLois Curfman McInnes . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1586db81eaa0SLois Curfman McInnes - -mat_aij_oneindex - Internally use indexing starting at 1 1587db81eaa0SLois Curfman McInnes rather than 0. Note that when calling MatSetValues(), 1588db81eaa0SLois Curfman McInnes the user still MUST index entries starting at 0! 15895511cfe3SLois Curfman McInnes 1590e0245417SLois Curfman McInnes Storage Information: 1591e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1592e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1593e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1594e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1595e0245417SLois Curfman McInnes 1596e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 15975ace5be8SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set 15985ace5be8SLois Curfman McInnes d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 15995ace5be8SLois Curfman McInnes memory allocation. Likewise, specify preallocated storage for the 16005ace5be8SLois Curfman McInnes off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1601ff756334SLois Curfman McInnes 16025511cfe3SLois Curfman McInnes Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 16035511cfe3SLois Curfman McInnes the figure below we depict these three local rows and all columns (0-11). 16042191d07cSBarry Smith 1605db81eaa0SLois Curfman McInnes .vb 1606db81eaa0SLois Curfman McInnes 0 1 2 3 4 5 6 7 8 9 10 11 1607db81eaa0SLois Curfman McInnes ------------------- 1608db81eaa0SLois Curfman McInnes row 3 | o o o d d d o o o o o o 1609db81eaa0SLois Curfman McInnes row 4 | o o o d d d o o o o o o 1610db81eaa0SLois Curfman McInnes row 5 | o o o d d d o o o o o o 1611db81eaa0SLois Curfman McInnes ------------------- 1612db81eaa0SLois Curfman McInnes .ve 1613b810aeb4SBarry Smith 16145511cfe3SLois Curfman McInnes Thus, any entries in the d locations are stored in the d (diagonal) 16155511cfe3SLois Curfman McInnes submatrix, and any entries in the o locations are stored in the 16165511cfe3SLois Curfman McInnes o (off-diagonal) submatrix. Note that the d and the o submatrices are 16175511cfe3SLois Curfman McInnes stored simply in the MATSEQAIJ format for compressed row storage. 16185511cfe3SLois Curfman McInnes 16195511cfe3SLois Curfman McInnes Now d_nz should indicate the number of nonzeros per row in the d matrix, 16205511cfe3SLois Curfman McInnes and o_nz should indicate the number of nonzeros per row in the o matrix. 16215511cfe3SLois Curfman McInnes In general, for PDE problems in which most nonzeros are near the diagonal, 16223d323bbdSBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 162392e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 16246da5968aSLois Curfman McInnes matrices. 16253a511b96SLois Curfman McInnes 1626dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1627ff756334SLois Curfman McInnes 1628fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 16298a729477SBarry Smith @*/ 1630e1311b90SBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 16318a729477SBarry Smith { 163244cd7ae7SLois Curfman McInnes Mat B; 163344cd7ae7SLois Curfman McInnes Mat_MPIAIJ *b; 163436ce4990SBarry Smith int ierr, i,sum[2],work[2],size; 1635416022c9SBarry Smith 16363a40ed3dSBarry Smith PetscFunctionBegin; 16373914022bSBarry Smith MPI_Comm_size(comm,&size); 16383914022bSBarry Smith if (size == 1) { 16393914022bSBarry Smith if (M == PETSC_DECIDE) M = m; 16403914022bSBarry Smith if (N == PETSC_DECIDE) N = n; 16413914022bSBarry Smith ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 16423a40ed3dSBarry Smith PetscFunctionReturn(0); 16433914022bSBarry Smith } 16443914022bSBarry Smith 164544cd7ae7SLois Curfman McInnes *A = 0; 1646f830108cSBarry Smith PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView); 164744cd7ae7SLois Curfman McInnes PLogObjectCreate(B); 164844cd7ae7SLois Curfman McInnes B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 164944cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1650f830108cSBarry Smith PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps)); 1651e1311b90SBarry Smith B->ops->destroy = MatDestroy_MPIAIJ; 1652e1311b90SBarry Smith B->ops->view = MatView_MPIAIJ; 165344cd7ae7SLois Curfman McInnes B->factor = 0; 165444cd7ae7SLois Curfman McInnes B->assembled = PETSC_FALSE; 165590f02eecSBarry Smith B->mapping = 0; 1656d6dfbf8fSBarry Smith 165747794344SBarry Smith B->insertmode = NOT_SET_VALUES; 16589eb4d147SSatish Balay b->size = size; 165944cd7ae7SLois Curfman McInnes MPI_Comm_rank(comm,&b->rank); 16601eb62cbbSBarry Smith 16613a40ed3dSBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1662a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 16633a40ed3dSBarry Smith } 16641987afe7SBarry Smith 1665dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 16661eb62cbbSBarry Smith work[0] = m; work[1] = n; 1667ca161407SBarry Smith ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1668dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1669dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 16701eb62cbbSBarry Smith } 167144cd7ae7SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 167244cd7ae7SLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 167344cd7ae7SLois Curfman McInnes b->m = m; B->m = m; 167444cd7ae7SLois Curfman McInnes b->n = n; B->n = n; 167544cd7ae7SLois Curfman McInnes b->N = N; B->N = N; 167644cd7ae7SLois Curfman McInnes b->M = M; B->M = M; 16771eb62cbbSBarry Smith 16781eb62cbbSBarry Smith /* build local table of row and column ownerships */ 167944cd7ae7SLois Curfman McInnes b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1680f09e8eb9SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1681603f58a4SSatish Balay b->cowners = b->rowners + b->size + 2; 1682ca161407SBarry Smith ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 168344cd7ae7SLois Curfman McInnes b->rowners[0] = 0; 168444cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 168544cd7ae7SLois Curfman McInnes b->rowners[i] += b->rowners[i-1]; 16868a729477SBarry Smith } 168744cd7ae7SLois Curfman McInnes b->rstart = b->rowners[b->rank]; 168844cd7ae7SLois Curfman McInnes b->rend = b->rowners[b->rank+1]; 1689ca161407SBarry Smith ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 169044cd7ae7SLois Curfman McInnes b->cowners[0] = 0; 169144cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 169244cd7ae7SLois Curfman McInnes b->cowners[i] += b->cowners[i-1]; 16938a729477SBarry Smith } 169444cd7ae7SLois Curfman McInnes b->cstart = b->cowners[b->rank]; 169544cd7ae7SLois Curfman McInnes b->cend = b->cowners[b->rank+1]; 16968a729477SBarry Smith 16975ace5be8SLois Curfman McInnes if (d_nz == PETSC_DEFAULT) d_nz = 5; 1698029af93fSBarry Smith ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 169944cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->A); 17007b8455f0SLois Curfman McInnes if (o_nz == PETSC_DEFAULT) o_nz = 0; 1701029af93fSBarry Smith ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 170244cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->B); 17038a729477SBarry Smith 17041eb62cbbSBarry Smith /* build cache for off array entries formed */ 170544cd7ae7SLois Curfman McInnes ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 170690f02eecSBarry Smith b->donotstash = 0; 170744cd7ae7SLois Curfman McInnes b->colmap = 0; 170844cd7ae7SLois Curfman McInnes b->garray = 0; 170944cd7ae7SLois Curfman McInnes b->roworiented = 1; 17108a729477SBarry Smith 17111eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 171244cd7ae7SLois Curfman McInnes b->lvec = 0; 171344cd7ae7SLois Curfman McInnes b->Mvctx = 0; 17148a729477SBarry Smith 17157a0afa10SBarry Smith /* stuff for MatGetRow() */ 171644cd7ae7SLois Curfman McInnes b->rowindices = 0; 171744cd7ae7SLois Curfman McInnes b->rowvalues = 0; 171844cd7ae7SLois Curfman McInnes b->getrowactive = PETSC_FALSE; 17197a0afa10SBarry Smith 172044cd7ae7SLois Curfman McInnes *A = B; 17213a40ed3dSBarry Smith PetscFunctionReturn(0); 1722d6dfbf8fSBarry Smith } 1723c74985f6SBarry Smith 17245615d1e5SSatish Balay #undef __FUNC__ 17255615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIAIJ" 17268f6be9afSLois Curfman McInnes int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1727d6dfbf8fSBarry Smith { 1728d6dfbf8fSBarry Smith Mat mat; 1729416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1730a1b97e82SLois Curfman McInnes int ierr, len=0, flg; 1731d6dfbf8fSBarry Smith 17323a40ed3dSBarry Smith PetscFunctionBegin; 1733416022c9SBarry Smith *newmat = 0; 1734f830108cSBarry Smith PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView); 1735a5a9c739SBarry Smith PLogObjectCreate(mat); 17360452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1737f830108cSBarry Smith PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps)); 1738e1311b90SBarry Smith mat->ops->destroy = MatDestroy_MPIAIJ; 1739e1311b90SBarry Smith mat->ops->view = MatView_MPIAIJ; 1740d6dfbf8fSBarry Smith mat->factor = matin->factor; 1741c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1742d6dfbf8fSBarry Smith 174344cd7ae7SLois Curfman McInnes a->m = mat->m = oldmat->m; 174444cd7ae7SLois Curfman McInnes a->n = mat->n = oldmat->n; 174544cd7ae7SLois Curfman McInnes a->M = mat->M = oldmat->M; 174644cd7ae7SLois Curfman McInnes a->N = mat->N = oldmat->N; 1747d6dfbf8fSBarry Smith 1748416022c9SBarry Smith a->rstart = oldmat->rstart; 1749416022c9SBarry Smith a->rend = oldmat->rend; 1750416022c9SBarry Smith a->cstart = oldmat->cstart; 1751416022c9SBarry Smith a->cend = oldmat->cend; 175217699dbbSLois Curfman McInnes a->size = oldmat->size; 175317699dbbSLois Curfman McInnes a->rank = oldmat->rank; 175447794344SBarry Smith mat->insertmode = NOT_SET_VALUES; 1755bcd2baecSBarry Smith a->rowvalues = 0; 1756bcd2baecSBarry Smith a->getrowactive = PETSC_FALSE; 1757d6dfbf8fSBarry Smith 1758603f58a4SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1759f09e8eb9SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1760603f58a4SSatish Balay a->cowners = a->rowners + a->size + 2; 1761603f58a4SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1762416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 17632ee70a88SLois Curfman McInnes if (oldmat->colmap) { 17640452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1765416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1766416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1767416022c9SBarry Smith } else a->colmap = 0; 17683f41c07dSBarry Smith if (oldmat->garray) { 17693f41c07dSBarry Smith len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 17703f41c07dSBarry Smith a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1771464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 17723f41c07dSBarry Smith if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1773416022c9SBarry Smith } else a->garray = 0; 1774d6dfbf8fSBarry Smith 1775416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1776416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1777a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1778416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1779416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1780416022c9SBarry Smith PLogObjectParent(mat,a->A); 1781416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1782416022c9SBarry Smith PLogObjectParent(mat,a->B); 17835dd7a6c7SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 178425cdf11fSBarry Smith if (flg) { 1785682d7d0cSBarry Smith ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1786682d7d0cSBarry Smith } 17878a729477SBarry Smith *newmat = mat; 17883a40ed3dSBarry Smith PetscFunctionReturn(0); 17898a729477SBarry Smith } 1790416022c9SBarry Smith 179177c4ece6SBarry Smith #include "sys.h" 1792416022c9SBarry Smith 17935615d1e5SSatish Balay #undef __FUNC__ 17945615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIAIJ" 179519bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1796416022c9SBarry Smith { 1797d65a2f8fSBarry Smith Mat A; 1798d65a2f8fSBarry Smith Scalar *vals,*svals; 179919bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 1800416022c9SBarry Smith MPI_Status status; 18013a40ed3dSBarry Smith int i, nz, ierr, j,rstart, rend, fd; 180217699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1803d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 180419bcc07fSBarry Smith int tag = ((PetscObject)viewer)->tag; 1805416022c9SBarry Smith 18063a40ed3dSBarry Smith PetscFunctionBegin; 180717699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 180817699dbbSLois Curfman McInnes if (!rank) { 180990ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 18100752156aSBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1811a8c6a408SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1812d64ed03dSBarry Smith if (header[3] < 0) { 1813a8c6a408SBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1814d64ed03dSBarry Smith } 18156c5fab8fSBarry Smith } 18166c5fab8fSBarry Smith 1817d64ed03dSBarry Smith 1818ca161407SBarry Smith ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1819416022c9SBarry Smith M = header[1]; N = header[2]; 1820416022c9SBarry Smith /* determine ownership of all rows */ 182117699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 18220452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1823ca161407SBarry Smith ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1824416022c9SBarry Smith rowners[0] = 0; 182517699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1826416022c9SBarry Smith rowners[i] += rowners[i-1]; 1827416022c9SBarry Smith } 182817699dbbSLois Curfman McInnes rstart = rowners[rank]; 182917699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1830416022c9SBarry Smith 1831416022c9SBarry Smith /* distribute row lengths to all processors */ 18320452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1833416022c9SBarry Smith offlens = ourlens + (rend-rstart); 183417699dbbSLois Curfman McInnes if (!rank) { 18350452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 18360752156aSBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 18370452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 183817699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1839ca161407SBarry Smith ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 18400452661fSBarry Smith PetscFree(sndcounts); 18413a40ed3dSBarry Smith } else { 1842ca161407SBarry Smith ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1843416022c9SBarry Smith } 1844416022c9SBarry Smith 184517699dbbSLois Curfman McInnes if (!rank) { 1846416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 18470452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1848cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 184917699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1850416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1851416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1852416022c9SBarry Smith } 1853416022c9SBarry Smith } 18540452661fSBarry Smith PetscFree(rowlengths); 1855416022c9SBarry Smith 1856416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1857416022c9SBarry Smith maxnz = 0; 185817699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 18590452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 1860416022c9SBarry Smith } 18610452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1862416022c9SBarry Smith 1863416022c9SBarry Smith /* read in my part of the matrix column indices */ 1864416022c9SBarry Smith nz = procsnz[0]; 18650452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 18660752156aSBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1867d65a2f8fSBarry Smith 1868d65a2f8fSBarry Smith /* read in every one elses and ship off */ 186917699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1870d65a2f8fSBarry Smith nz = procsnz[i]; 18710752156aSBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1872ca161407SBarry Smith ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1873d65a2f8fSBarry Smith } 18740452661fSBarry Smith PetscFree(cols); 18753a40ed3dSBarry Smith } else { 1876416022c9SBarry Smith /* determine buffer space needed for message */ 1877416022c9SBarry Smith nz = 0; 1878416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1879416022c9SBarry Smith nz += ourlens[i]; 1880416022c9SBarry Smith } 18810452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1882416022c9SBarry Smith 1883416022c9SBarry Smith /* receive message of column indices*/ 1884ca161407SBarry Smith ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1885ca161407SBarry Smith ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1886a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1887416022c9SBarry Smith } 1888416022c9SBarry Smith 1889416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1890cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 1891416022c9SBarry Smith jj = 0; 1892416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1893416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1894d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1895416022c9SBarry Smith jj++; 1896416022c9SBarry Smith } 1897416022c9SBarry Smith } 1898d65a2f8fSBarry Smith 1899d65a2f8fSBarry Smith /* create our matrix */ 1900416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1901416022c9SBarry Smith ourlens[i] -= offlens[i]; 1902416022c9SBarry Smith } 1903d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1904d65a2f8fSBarry Smith A = *newmat; 19056d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 1906d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1907d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1908d65a2f8fSBarry Smith } 1909416022c9SBarry Smith 191017699dbbSLois Curfman McInnes if (!rank) { 19110452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1912416022c9SBarry Smith 1913416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1914416022c9SBarry Smith nz = procsnz[0]; 19150752156aSBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1916d65a2f8fSBarry Smith 1917d65a2f8fSBarry Smith /* insert into matrix */ 1918d65a2f8fSBarry Smith jj = rstart; 1919d65a2f8fSBarry Smith smycols = mycols; 1920d65a2f8fSBarry Smith svals = vals; 1921d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1922d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1923d65a2f8fSBarry Smith smycols += ourlens[i]; 1924d65a2f8fSBarry Smith svals += ourlens[i]; 1925d65a2f8fSBarry Smith jj++; 1926416022c9SBarry Smith } 1927416022c9SBarry Smith 1928d65a2f8fSBarry Smith /* read in other processors and ship out */ 192917699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1930416022c9SBarry Smith nz = procsnz[i]; 19310752156aSBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1932ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1933416022c9SBarry Smith } 19340452661fSBarry Smith PetscFree(procsnz); 19353a40ed3dSBarry Smith } else { 1936d65a2f8fSBarry Smith /* receive numeric values */ 19370452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1938416022c9SBarry Smith 1939d65a2f8fSBarry Smith /* receive message of values*/ 1940ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1941ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1942a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1943d65a2f8fSBarry Smith 1944d65a2f8fSBarry Smith /* insert into matrix */ 1945d65a2f8fSBarry Smith jj = rstart; 1946d65a2f8fSBarry Smith smycols = mycols; 1947d65a2f8fSBarry Smith svals = vals; 1948d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1949d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1950d65a2f8fSBarry Smith smycols += ourlens[i]; 1951d65a2f8fSBarry Smith svals += ourlens[i]; 1952d65a2f8fSBarry Smith jj++; 1953d65a2f8fSBarry Smith } 1954d65a2f8fSBarry Smith } 19550452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1956d65a2f8fSBarry Smith 19576d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 19586d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 19593a40ed3dSBarry Smith PetscFunctionReturn(0); 1960416022c9SBarry Smith } 1961a0ff6018SBarry Smith 196229da9460SBarry Smith #undef __FUNC__ 196329da9460SBarry Smith #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 1964a0ff6018SBarry Smith /* 196529da9460SBarry Smith Not great since it makes two copies of the submatrix, first an SeqAIJ 196629da9460SBarry Smith in local and then by concatenating the local matrices the end result. 196729da9460SBarry Smith Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1968a0ff6018SBarry Smith */ 19696a6a5d1dSBarry Smith int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat) 1970a0ff6018SBarry Smith { 197100e6dbe6SBarry Smith int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1972fee21e36SBarry Smith Mat *local,M, Mreuse; 197300e6dbe6SBarry Smith Scalar *vwork,*aa; 197400e6dbe6SBarry Smith MPI_Comm comm = mat->comm; 197500e6dbe6SBarry Smith Mat_SeqAIJ *aij; 197600e6dbe6SBarry Smith int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 1977a0ff6018SBarry Smith 1978a0ff6018SBarry Smith PetscFunctionBegin; 197900e6dbe6SBarry Smith MPI_Comm_rank(comm,&rank); 198000e6dbe6SBarry Smith MPI_Comm_size(comm,&size); 198100e6dbe6SBarry Smith 1982fee21e36SBarry Smith if (call == MAT_REUSE_MATRIX) { 1983fee21e36SBarry Smith ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1984fee21e36SBarry Smith if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 1985fee21e36SBarry Smith local = &Mreuse; 1986fee21e36SBarry Smith ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1987fee21e36SBarry Smith } else { 1988a0ff6018SBarry Smith ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1989fee21e36SBarry Smith Mreuse = *local; 1990fee21e36SBarry Smith PetscFree(local); 1991fee21e36SBarry Smith } 1992a0ff6018SBarry Smith 1993a0ff6018SBarry Smith /* 1994a0ff6018SBarry Smith m - number of local rows 1995a0ff6018SBarry Smith n - number of columns (same on all processors) 1996a0ff6018SBarry Smith rstart - first row in new global matrix generated 1997a0ff6018SBarry Smith */ 1998fee21e36SBarry Smith ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1999a0ff6018SBarry Smith if (call == MAT_INITIAL_MATRIX) { 2000fee21e36SBarry Smith aij = (Mat_SeqAIJ *) (Mreuse)->data; 2001a8c6a408SBarry Smith if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 200200e6dbe6SBarry Smith ii = aij->i; 200300e6dbe6SBarry Smith jj = aij->j; 200400e6dbe6SBarry Smith 2005a0ff6018SBarry Smith /* 200600e6dbe6SBarry Smith Determine the number of non-zeros in the diagonal and off-diagonal 200700e6dbe6SBarry Smith portions of the matrix in order to do correct preallocation 2008a0ff6018SBarry Smith */ 200900e6dbe6SBarry Smith 201000e6dbe6SBarry Smith /* first get start and end of "diagonal" columns */ 20116a6a5d1dSBarry Smith if (csize == PETSC_DECIDE) { 201200e6dbe6SBarry Smith nlocal = n/size + ((n % size) > rank); 20136a6a5d1dSBarry Smith } else { 20146a6a5d1dSBarry Smith nlocal = csize; 20156a6a5d1dSBarry Smith } 2016ca161407SBarry Smith ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 201700e6dbe6SBarry Smith rstart = rend - nlocal; 20186a6a5d1dSBarry Smith if (rank == size - 1 && rend != n) { 20196a6a5d1dSBarry Smith SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 20206a6a5d1dSBarry Smith } 202100e6dbe6SBarry Smith 202200e6dbe6SBarry Smith /* next, compute all the lengths */ 202300e6dbe6SBarry Smith dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 202400e6dbe6SBarry Smith olens = dlens + m; 202500e6dbe6SBarry Smith for ( i=0; i<m; i++ ) { 202600e6dbe6SBarry Smith jend = ii[i+1] - ii[i]; 202700e6dbe6SBarry Smith olen = 0; 202800e6dbe6SBarry Smith dlen = 0; 202900e6dbe6SBarry Smith for ( j=0; j<jend; j++ ) { 203000e6dbe6SBarry Smith if ( *jj < rstart || *jj >= rend) olen++; 203100e6dbe6SBarry Smith else dlen++; 203200e6dbe6SBarry Smith jj++; 203300e6dbe6SBarry Smith } 203400e6dbe6SBarry Smith olens[i] = olen; 203500e6dbe6SBarry Smith dlens[i] = dlen; 203600e6dbe6SBarry Smith } 203700e6dbe6SBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 203800e6dbe6SBarry Smith PetscFree(dlens); 2039a0ff6018SBarry Smith } else { 2040a0ff6018SBarry Smith int ml,nl; 2041a0ff6018SBarry Smith 2042a0ff6018SBarry Smith M = *newmat; 2043a0ff6018SBarry Smith ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2044a8c6a408SBarry Smith if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2045a0ff6018SBarry Smith ierr = MatZeroEntries(M);CHKERRQ(ierr); 2046c48de900SBarry Smith /* 2047c48de900SBarry Smith The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2048c48de900SBarry Smith rather than the slower MatSetValues(). 2049c48de900SBarry Smith */ 2050c48de900SBarry Smith M->was_assembled = PETSC_TRUE; 2051c48de900SBarry Smith M->assembled = PETSC_FALSE; 2052a0ff6018SBarry Smith } 2053a0ff6018SBarry Smith ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2054fee21e36SBarry Smith aij = (Mat_SeqAIJ *) (Mreuse)->data; 2055a8c6a408SBarry Smith if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 205600e6dbe6SBarry Smith ii = aij->i; 205700e6dbe6SBarry Smith jj = aij->j; 205800e6dbe6SBarry Smith aa = aij->a; 2059a0ff6018SBarry Smith for (i=0; i<m; i++) { 2060a0ff6018SBarry Smith row = rstart + i; 206100e6dbe6SBarry Smith nz = ii[i+1] - ii[i]; 206200e6dbe6SBarry Smith cwork = jj; jj += nz; 206300e6dbe6SBarry Smith vwork = aa; aa += nz; 20648c638d02SBarry Smith ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2065a0ff6018SBarry Smith } 2066a0ff6018SBarry Smith 2067a0ff6018SBarry Smith ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2068a0ff6018SBarry Smith ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2069a0ff6018SBarry Smith *newmat = M; 2070fee21e36SBarry Smith 2071fee21e36SBarry Smith /* save submatrix used in processor for next request */ 2072fee21e36SBarry Smith if (call == MAT_INITIAL_MATRIX) { 2073fee21e36SBarry Smith ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2074fee21e36SBarry Smith ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2075fee21e36SBarry Smith } 2076fee21e36SBarry Smith 2077a0ff6018SBarry Smith PetscFunctionReturn(0); 2078a0ff6018SBarry Smith } 2079