1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*ca161407SBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.223 1997/10/29 14:07:25 bsmith Exp bsmith $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 53369ce9aSBarry Smith #include "pinclude/pviewer.h" 670f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h" 7f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 8d9942c19SSatish Balay #include "src/inline/spops.h" 98a729477SBarry Smith 109e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 119e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 129e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 139e25ed09SBarry Smith length of colmap equals the global matrix length. 149e25ed09SBarry Smith */ 155615d1e5SSatish Balay #undef __FUNC__ 16d4bb536fSBarry Smith #define __FUNC__ "CreateColmap_MPIAIJ_Private" 170a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat) 189e25ed09SBarry Smith { 1944a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 20ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 21905e6a2fSBarry Smith int n = B->n,i; 22dbb450caSBarry Smith 233a40ed3dSBarry Smith PetscFunctionBegin; 24758f045eSSatish Balay aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 25464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 26cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 27905e6a2fSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 283a40ed3dSBarry Smith PetscFunctionReturn(0); 299e25ed09SBarry Smith } 309e25ed09SBarry Smith 312493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 322493cbb0SBarry Smith 330520107fSSatish Balay #define CHUNKSIZE 15 3430770e4dSSatish Balay #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 350520107fSSatish Balay { \ 360520107fSSatish Balay \ 370520107fSSatish Balay rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 3830770e4dSSatish Balay rmax = aimax[row]; nrow = ailen[row]; \ 39f5e9677aSSatish Balay col1 = col - shift; \ 40f5e9677aSSatish Balay \ 41ba4e3ef2SSatish Balay low = 0; high = nrow; \ 42ba4e3ef2SSatish Balay while (high-low > 5) { \ 43ba4e3ef2SSatish Balay t = (low+high)/2; \ 44ba4e3ef2SSatish Balay if (rp[t] > col) high = t; \ 45ba4e3ef2SSatish Balay else low = t; \ 46ba4e3ef2SSatish Balay } \ 470520107fSSatish Balay for ( _i=0; _i<nrow; _i++ ) { \ 48f5e9677aSSatish Balay if (rp[_i] > col1) break; \ 49f5e9677aSSatish Balay if (rp[_i] == col1) { \ 500520107fSSatish Balay if (addv == ADD_VALUES) ap[_i] += value; \ 510520107fSSatish Balay else ap[_i] = value; \ 5230770e4dSSatish Balay goto a_noinsert; \ 530520107fSSatish Balay } \ 540520107fSSatish Balay } \ 5589280ab3SLois Curfman McInnes if (nonew == 1) goto a_noinsert; \ 5611ebbc71SLois Curfman McInnes else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 570520107fSSatish Balay if (nrow >= rmax) { \ 580520107fSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 590520107fSSatish Balay int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 600520107fSSatish Balay Scalar *new_a; \ 610520107fSSatish Balay \ 6211ebbc71SLois Curfman McInnes if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 6389280ab3SLois Curfman McInnes \ 640520107fSSatish Balay /* malloc new storage space */ \ 650520107fSSatish Balay len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 660520107fSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 670520107fSSatish Balay new_j = (int *) (new_a + new_nz); \ 680520107fSSatish Balay new_i = new_j + new_nz; \ 690520107fSSatish Balay \ 700520107fSSatish Balay /* copy over old data into new slots */ \ 710520107fSSatish Balay for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 720520107fSSatish Balay for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 730520107fSSatish Balay PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 740520107fSSatish Balay len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 750520107fSSatish Balay PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 760520107fSSatish Balay len*sizeof(int)); \ 770520107fSSatish Balay PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 780520107fSSatish Balay PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 790520107fSSatish Balay len*sizeof(Scalar)); \ 800520107fSSatish Balay /* free up old matrix storage */ \ 81f5e9677aSSatish Balay \ 820520107fSSatish Balay PetscFree(a->a); \ 830520107fSSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 840520107fSSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 850520107fSSatish Balay a->singlemalloc = 1; \ 860520107fSSatish Balay \ 870520107fSSatish Balay rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 8830770e4dSSatish Balay rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 890520107fSSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 900520107fSSatish Balay a->maxnz += CHUNKSIZE; \ 910520107fSSatish Balay a->reallocs++; \ 920520107fSSatish Balay } \ 930520107fSSatish Balay N = nrow++ - 1; a->nz++; \ 940520107fSSatish Balay /* shift up all the later entries in this row */ \ 950520107fSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 960520107fSSatish Balay rp[ii+1] = rp[ii]; \ 970520107fSSatish Balay ap[ii+1] = ap[ii]; \ 980520107fSSatish Balay } \ 99f5e9677aSSatish Balay rp[_i] = col1; \ 1000520107fSSatish Balay ap[_i] = value; \ 10130770e4dSSatish Balay a_noinsert: ; \ 1020520107fSSatish Balay ailen[row] = nrow; \ 1030520107fSSatish Balay } 1040a198c4cSBarry Smith 10530770e4dSSatish Balay #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 10630770e4dSSatish Balay { \ 10730770e4dSSatish Balay \ 10830770e4dSSatish Balay rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 10930770e4dSSatish Balay rmax = bimax[row]; nrow = bilen[row]; \ 11030770e4dSSatish Balay col1 = col - shift; \ 11130770e4dSSatish Balay \ 112ba4e3ef2SSatish Balay low = 0; high = nrow; \ 113ba4e3ef2SSatish Balay while (high-low > 5) { \ 114ba4e3ef2SSatish Balay t = (low+high)/2; \ 115ba4e3ef2SSatish Balay if (rp[t] > col) high = t; \ 116ba4e3ef2SSatish Balay else low = t; \ 117ba4e3ef2SSatish Balay } \ 11830770e4dSSatish Balay for ( _i=0; _i<nrow; _i++ ) { \ 11930770e4dSSatish Balay if (rp[_i] > col1) break; \ 12030770e4dSSatish Balay if (rp[_i] == col1) { \ 12130770e4dSSatish Balay if (addv == ADD_VALUES) ap[_i] += value; \ 12230770e4dSSatish Balay else ap[_i] = value; \ 12330770e4dSSatish Balay goto b_noinsert; \ 12430770e4dSSatish Balay } \ 12530770e4dSSatish Balay } \ 12689280ab3SLois Curfman McInnes if (nonew == 1) goto b_noinsert; \ 12711ebbc71SLois Curfman McInnes else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 12830770e4dSSatish Balay if (nrow >= rmax) { \ 12930770e4dSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 13074c639caSSatish Balay int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 13130770e4dSSatish Balay Scalar *new_a; \ 13230770e4dSSatish Balay \ 13311ebbc71SLois Curfman McInnes if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 13489280ab3SLois Curfman McInnes \ 13530770e4dSSatish Balay /* malloc new storage space */ \ 13674c639caSSatish Balay len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 13730770e4dSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 13830770e4dSSatish Balay new_j = (int *) (new_a + new_nz); \ 13930770e4dSSatish Balay new_i = new_j + new_nz; \ 14030770e4dSSatish Balay \ 14130770e4dSSatish Balay /* copy over old data into new slots */ \ 14230770e4dSSatish Balay for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 14374c639caSSatish Balay for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 14430770e4dSSatish Balay PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 14530770e4dSSatish Balay len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 14630770e4dSSatish Balay PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 14730770e4dSSatish Balay len*sizeof(int)); \ 14830770e4dSSatish Balay PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 14930770e4dSSatish Balay PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 15030770e4dSSatish Balay len*sizeof(Scalar)); \ 15130770e4dSSatish Balay /* free up old matrix storage */ \ 15230770e4dSSatish Balay \ 15374c639caSSatish Balay PetscFree(b->a); \ 15474c639caSSatish Balay if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 15574c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 15674c639caSSatish Balay b->singlemalloc = 1; \ 15730770e4dSSatish Balay \ 15830770e4dSSatish Balay rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 15930770e4dSSatish Balay rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 16074c639caSSatish Balay PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 16174c639caSSatish Balay b->maxnz += CHUNKSIZE; \ 16274c639caSSatish Balay b->reallocs++; \ 16330770e4dSSatish Balay } \ 16474c639caSSatish Balay N = nrow++ - 1; b->nz++; \ 16530770e4dSSatish Balay /* shift up all the later entries in this row */ \ 16630770e4dSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 16730770e4dSSatish Balay rp[ii+1] = rp[ii]; \ 16830770e4dSSatish Balay ap[ii+1] = ap[ii]; \ 16930770e4dSSatish Balay } \ 17030770e4dSSatish Balay rp[_i] = col1; \ 17130770e4dSSatish Balay ap[_i] = value; \ 17230770e4dSSatish Balay b_noinsert: ; \ 17330770e4dSSatish Balay bilen[row] = nrow; \ 17430770e4dSSatish Balay } 17530770e4dSSatish Balay 1760520107fSSatish Balay extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 1775615d1e5SSatish Balay #undef __FUNC__ 1785615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIAIJ" 1798f6be9afSLois Curfman McInnes int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 1808a729477SBarry Smith { 18144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1824b0e389bSBarry Smith Scalar value; 1831eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 1841eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 185905e6a2fSBarry Smith int roworiented = aij->roworiented; 1868a729477SBarry Smith 1870520107fSSatish Balay /* Some Variables required in the macro */ 1884ee7247eSSatish Balay Mat A = aij->A; 1894ee7247eSSatish Balay Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 19030770e4dSSatish Balay int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 19130770e4dSSatish Balay Scalar *aa = a->a; 19230770e4dSSatish Balay 19330770e4dSSatish Balay Mat B = aij->B; 19430770e4dSSatish Balay Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 19530770e4dSSatish Balay int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 19630770e4dSSatish Balay Scalar *ba = b->a; 19730770e4dSSatish Balay 198ba4e3ef2SSatish Balay int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 19930770e4dSSatish Balay int nonew = a->nonew,shift = a->indexshift; 20030770e4dSSatish Balay Scalar *ap; 2014ee7247eSSatish Balay 2023a40ed3dSBarry Smith PetscFunctionBegin; 2038a729477SBarry Smith for ( i=0; i<m; i++ ) { 2043a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 205e3372554SBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 206e3372554SBarry Smith if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 2070a198c4cSBarry Smith #endif 2084b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 2094b0e389bSBarry Smith row = im[i] - rstart; 2101eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 2114b0e389bSBarry Smith if (in[j] >= cstart && in[j] < cend){ 2124b0e389bSBarry Smith col = in[j] - cstart; 2134b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 21430770e4dSSatish Balay MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 2150520107fSSatish Balay /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 2161eb62cbbSBarry Smith } 2173a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 218e3372554SBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 219e3372554SBarry Smith else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 2200a198c4cSBarry Smith #endif 2211eb62cbbSBarry Smith else { 222227d817aSBarry Smith if (mat->was_assembled) { 223905e6a2fSBarry Smith if (!aij->colmap) { 224905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 225905e6a2fSBarry Smith } 226905e6a2fSBarry Smith col = aij->colmap[in[j]] - 1; 227ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 2282493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2294b0e389bSBarry Smith col = in[j]; 2309bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 231f9508a3cSSatish Balay B = aij->B; 232f9508a3cSSatish Balay b = (Mat_SeqAIJ *) B->data; 233f9508a3cSSatish Balay bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 234f9508a3cSSatish Balay ba = b->a; 235d6dfbf8fSBarry Smith } 2369e25ed09SBarry Smith } 2374b0e389bSBarry Smith else col = in[j]; 2384b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 23930770e4dSSatish Balay MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 24030770e4dSSatish Balay /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 2411eb62cbbSBarry Smith } 2421eb62cbbSBarry Smith } 2431eb62cbbSBarry Smith } 2441eb62cbbSBarry Smith else { 24590f02eecSBarry Smith if (roworiented && !aij->donotstash) { 2464b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 2474b0e389bSBarry Smith } 2484b0e389bSBarry Smith else { 24990f02eecSBarry Smith if (!aij->donotstash) { 2504b0e389bSBarry Smith row = im[i]; 2514b0e389bSBarry Smith for ( j=0; j<n; j++ ) { 2524b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 2534b0e389bSBarry Smith } 2544b0e389bSBarry Smith } 2551eb62cbbSBarry Smith } 2568a729477SBarry Smith } 25790f02eecSBarry Smith } 2583a40ed3dSBarry Smith PetscFunctionReturn(0); 2598a729477SBarry Smith } 2608a729477SBarry Smith 2615615d1e5SSatish Balay #undef __FUNC__ 2625615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIAIJ" 2638f6be9afSLois Curfman McInnes int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 264b49de8d1SLois Curfman McInnes { 265b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 266b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 267b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 268b49de8d1SLois Curfman McInnes 2693a40ed3dSBarry Smith PetscFunctionBegin; 270b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 271e3372554SBarry Smith if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 272e3372554SBarry Smith if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 273b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 274b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 275b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 276e3372554SBarry Smith if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 277e3372554SBarry Smith if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 278b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 279b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 280b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 281b49de8d1SLois Curfman McInnes } 282b49de8d1SLois Curfman McInnes else { 283905e6a2fSBarry Smith if (!aij->colmap) { 284905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 285905e6a2fSBarry Smith } 286905e6a2fSBarry Smith col = aij->colmap[idxn[j]] - 1; 287e60e1c95SSatish Balay if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 288d9d09a02SSatish Balay else { 289b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 290b49de8d1SLois Curfman McInnes } 291b49de8d1SLois Curfman McInnes } 292b49de8d1SLois Curfman McInnes } 293d9d09a02SSatish Balay } 294b49de8d1SLois Curfman McInnes else { 295e3372554SBarry Smith SETERRQ(1,0,"Only local values currently supported"); 296b49de8d1SLois Curfman McInnes } 297b49de8d1SLois Curfman McInnes } 2983a40ed3dSBarry Smith PetscFunctionReturn(0); 299b49de8d1SLois Curfman McInnes } 300b49de8d1SLois Curfman McInnes 3015615d1e5SSatish Balay #undef __FUNC__ 3025615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 3038f6be9afSLois Curfman McInnes int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 3048a729477SBarry Smith { 30544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 306d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 30717699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 30817699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 3091eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 3106abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 3111eb62cbbSBarry Smith InsertMode addv; 3121eb62cbbSBarry Smith Scalar *rvalues,*svalues; 3131eb62cbbSBarry Smith 3143a40ed3dSBarry Smith PetscFunctionBegin; 3151eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 316*ca161407SBarry Smith ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 317dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 318e3372554SBarry Smith SETERRQ(1,0,"Some processors inserted others added"); 3191eb62cbbSBarry Smith } 32047794344SBarry Smith mat->insertmode = addv; /* in case this processor had no cache */ 3211eb62cbbSBarry Smith 3221eb62cbbSBarry Smith /* first count number of contributors to each processor */ 3230452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 324cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 3250452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 3261eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 3271eb62cbbSBarry Smith idx = aij->stash.idx[i]; 32817699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 3291eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3301eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 3318a729477SBarry Smith } 3328a729477SBarry Smith } 3338a729477SBarry Smith } 33417699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 3351eb62cbbSBarry Smith 3361eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3370452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 338*ca161407SBarry Smith ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 33917699dbbSLois Curfman McInnes nreceives = work[rank]; 340*ca161407SBarry Smith ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 34117699dbbSLois Curfman McInnes nmax = work[rank]; 3420452661fSBarry Smith PetscFree(work); 3431eb62cbbSBarry Smith 3441eb62cbbSBarry Smith /* post receives: 3451eb62cbbSBarry Smith 1) each message will consist of ordered pairs 3461eb62cbbSBarry Smith (global index,value) we store the global index as a double 347d6dfbf8fSBarry Smith to simplify the message passing. 3481eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 3491eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 3501eb62cbbSBarry Smith this is a lot of wasted space. 3511eb62cbbSBarry Smith 3521eb62cbbSBarry Smith 3531eb62cbbSBarry Smith This could be done better. 3541eb62cbbSBarry Smith */ 355*ca161407SBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 356*ca161407SBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 3571eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 358*ca161407SBarry Smith ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 359*ca161407SBarry Smith comm,recv_waits+i);CHKERRQ(ierr); 3601eb62cbbSBarry Smith } 3611eb62cbbSBarry Smith 3621eb62cbbSBarry Smith /* do sends: 3631eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3641eb62cbbSBarry Smith the ith processor 3651eb62cbbSBarry Smith */ 3660452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 367*ca161407SBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 3680452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 3691eb62cbbSBarry Smith starts[0] = 0; 37017699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3711eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 3721eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 3731eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 3741eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 3751eb62cbbSBarry Smith } 3760452661fSBarry Smith PetscFree(owner); 3771eb62cbbSBarry Smith starts[0] = 0; 37817699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3791eb62cbbSBarry Smith count = 0; 38017699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 3811eb62cbbSBarry Smith if (procs[i]) { 382*ca161407SBarry Smith ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 3831eb62cbbSBarry Smith } 3841eb62cbbSBarry Smith } 3850452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 3861eb62cbbSBarry Smith 3871eb62cbbSBarry Smith /* Free cache space */ 38855908cccSLois Curfman McInnes PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 38978b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 3901eb62cbbSBarry Smith 3911eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 3921eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 3931eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 3941eb62cbbSBarry Smith aij->rmax = nmax; 3951eb62cbbSBarry Smith 3963a40ed3dSBarry Smith PetscFunctionReturn(0); 3971eb62cbbSBarry Smith } 39844a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 3991eb62cbbSBarry Smith 4005615d1e5SSatish Balay #undef __FUNC__ 4015615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 4028f6be9afSLois Curfman McInnes int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 4031eb62cbbSBarry Smith { 40444a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 4051eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 406416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 407905e6a2fSBarry Smith int row,col,other_disassembled; 4081eb62cbbSBarry Smith Scalar *values,val; 40947794344SBarry Smith InsertMode addv = mat->insertmode; 4101eb62cbbSBarry Smith 4113a40ed3dSBarry Smith PetscFunctionBegin; 4121eb62cbbSBarry Smith /* wait on receives */ 4131eb62cbbSBarry Smith while (count) { 414*ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 4151eb62cbbSBarry Smith /* unpack receives into our local space */ 416d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 417*ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 4181eb62cbbSBarry Smith n = n/3; 4191eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 420227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - aij->rstart; 421227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 4221eb62cbbSBarry Smith val = values[3*i+2]; 4231eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 4241eb62cbbSBarry Smith col -= aij->cstart; 4256fd7127cSSatish Balay ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 4263a40ed3dSBarry Smith } else { 42755a1b374SBarry Smith if (mat->was_assembled || mat->assembled) { 428905e6a2fSBarry Smith if (!aij->colmap) { 429905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 430905e6a2fSBarry Smith } 431905e6a2fSBarry Smith col = aij->colmap[col] - 1; 432ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4332493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 434227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 435d6dfbf8fSBarry Smith } 4369e25ed09SBarry Smith } 4376fd7127cSSatish Balay ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 4381eb62cbbSBarry Smith } 4391eb62cbbSBarry Smith } 4401eb62cbbSBarry Smith count--; 4411eb62cbbSBarry Smith } 4420452661fSBarry Smith PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 4431eb62cbbSBarry Smith 4441eb62cbbSBarry Smith /* wait on sends */ 4451eb62cbbSBarry Smith if (aij->nsends) { 4460a198c4cSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 447*ca161407SBarry Smith ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 4480452661fSBarry Smith PetscFree(send_status); 4491eb62cbbSBarry Smith } 4500452661fSBarry Smith PetscFree(aij->send_waits); PetscFree(aij->svalues); 4511eb62cbbSBarry Smith 45278b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 45378b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 4541eb62cbbSBarry Smith 4552493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 4562493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 457*ca161407SBarry Smith ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 458227d817aSBarry Smith if (mat->was_assembled && !other_disassembled) { 4592493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 4602493cbb0SBarry Smith } 4612493cbb0SBarry Smith 4626d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 46378b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 4645e42470aSBarry Smith } 46578b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 46678b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 4675e42470aSBarry Smith 4687a0afa10SBarry Smith if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 4693a40ed3dSBarry Smith PetscFunctionReturn(0); 4708a729477SBarry Smith } 4718a729477SBarry Smith 4725615d1e5SSatish Balay #undef __FUNC__ 4735615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIAIJ" 4748f6be9afSLois Curfman McInnes int MatZeroEntries_MPIAIJ(Mat A) 4751eb62cbbSBarry Smith { 47644a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 477dbd7a890SLois Curfman McInnes int ierr; 4783a40ed3dSBarry Smith 4793a40ed3dSBarry Smith PetscFunctionBegin; 48078b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 48178b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 4823a40ed3dSBarry Smith PetscFunctionReturn(0); 4831eb62cbbSBarry Smith } 4841eb62cbbSBarry Smith 4851eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 4861eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 4871eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 4881eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 4891eb62cbbSBarry Smith routine. 4901eb62cbbSBarry Smith */ 4915615d1e5SSatish Balay #undef __FUNC__ 4925615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIAIJ" 4938f6be9afSLois Curfman McInnes int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 4941eb62cbbSBarry Smith { 49544a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 49617699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 4976abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 49817699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 4995392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 500d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 501d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 5021eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 5031eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 5041eb62cbbSBarry Smith IS istmp; 5051eb62cbbSBarry Smith 5063a40ed3dSBarry Smith PetscFunctionBegin; 50777c4ece6SBarry Smith ierr = ISGetSize(is,&N); CHKERRQ(ierr); 50878b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 5091eb62cbbSBarry Smith 5101eb62cbbSBarry Smith /* first count number of contributors to each processor */ 5110452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 512cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 5130452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 5141eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 5151eb62cbbSBarry Smith idx = rows[i]; 5161eb62cbbSBarry Smith found = 0; 51717699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 5181eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 5191eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 5201eb62cbbSBarry Smith } 5211eb62cbbSBarry Smith } 522e3372554SBarry Smith if (!found) SETERRQ(1,0,"Index out of range"); 5231eb62cbbSBarry Smith } 52417699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 5251eb62cbbSBarry Smith 5261eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 5270452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 528*ca161407SBarry Smith ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 52917699dbbSLois Curfman McInnes nrecvs = work[rank]; 530*ca161407SBarry Smith ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 53117699dbbSLois Curfman McInnes nmax = work[rank]; 5320452661fSBarry Smith PetscFree(work); 5331eb62cbbSBarry Smith 5341eb62cbbSBarry Smith /* post receives: */ 5353a40ed3dSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 536*ca161407SBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 5371eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 538*ca161407SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 5391eb62cbbSBarry Smith } 5401eb62cbbSBarry Smith 5411eb62cbbSBarry Smith /* do sends: 5421eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 5431eb62cbbSBarry Smith the ith processor 5441eb62cbbSBarry Smith */ 5450452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 5463a40ed3dSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 5470452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 5481eb62cbbSBarry Smith starts[0] = 0; 54917699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 5501eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 5511eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 5521eb62cbbSBarry Smith } 5531eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 5541eb62cbbSBarry Smith 5551eb62cbbSBarry Smith starts[0] = 0; 55617699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 5571eb62cbbSBarry Smith count = 0; 55817699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 5591eb62cbbSBarry Smith if (procs[i]) { 560*ca161407SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 5611eb62cbbSBarry Smith } 5621eb62cbbSBarry Smith } 5630452661fSBarry Smith PetscFree(starts); 5641eb62cbbSBarry Smith 56517699dbbSLois Curfman McInnes base = owners[rank]; 5661eb62cbbSBarry Smith 5671eb62cbbSBarry Smith /* wait on receives */ 5680452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 5691eb62cbbSBarry Smith source = lens + nrecvs; 5701eb62cbbSBarry Smith count = nrecvs; slen = 0; 5711eb62cbbSBarry Smith while (count) { 572*ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 5731eb62cbbSBarry Smith /* unpack receives into our local space */ 574*ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 575d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 576d6dfbf8fSBarry Smith lens[imdex] = n; 5771eb62cbbSBarry Smith slen += n; 5781eb62cbbSBarry Smith count--; 5791eb62cbbSBarry Smith } 5800452661fSBarry Smith PetscFree(recv_waits); 5811eb62cbbSBarry Smith 5821eb62cbbSBarry Smith /* move the data into the send scatter */ 5830452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 5841eb62cbbSBarry Smith count = 0; 5851eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 5861eb62cbbSBarry Smith values = rvalues + i*nmax; 5871eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 5881eb62cbbSBarry Smith lrows[count++] = values[j] - base; 5891eb62cbbSBarry Smith } 5901eb62cbbSBarry Smith } 5910452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 5920452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 5931eb62cbbSBarry Smith 5941eb62cbbSBarry Smith /* actually zap the local rows */ 595029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 596464493b3SBarry Smith PLogObjectParent(A,istmp); 5970452661fSBarry Smith PetscFree(lrows); 59878b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 59978b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 60078b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 6011eb62cbbSBarry Smith 6021eb62cbbSBarry Smith /* wait on sends */ 6031eb62cbbSBarry Smith if (nsends) { 604*ca161407SBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 605*ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 6060452661fSBarry Smith PetscFree(send_status); 6071eb62cbbSBarry Smith } 6080452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 6091eb62cbbSBarry Smith 6103a40ed3dSBarry Smith PetscFunctionReturn(0); 6111eb62cbbSBarry Smith } 6121eb62cbbSBarry Smith 6135615d1e5SSatish Balay #undef __FUNC__ 6145615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIAIJ" 6158f6be9afSLois Curfman McInnes int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 6161eb62cbbSBarry Smith { 617416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 618fbd6ef76SBarry Smith int ierr,nt; 619416022c9SBarry Smith 6203a40ed3dSBarry Smith PetscFunctionBegin; 621a2ce50c7SBarry Smith ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 622fbd6ef76SBarry Smith if (nt != a->n) { 623f40265daSLois Curfman McInnes SETERRQ(1,0,"Incompatible partition of A and xx"); 624fbd6ef76SBarry Smith } 62543a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 62638597bd4SSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 62743a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 62838597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 6293a40ed3dSBarry Smith PetscFunctionReturn(0); 6301eb62cbbSBarry Smith } 6311eb62cbbSBarry Smith 6325615d1e5SSatish Balay #undef __FUNC__ 6335615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIAIJ" 6348f6be9afSLois Curfman McInnes int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 635da3a660dSBarry Smith { 636416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 637da3a660dSBarry Smith int ierr; 6383a40ed3dSBarry Smith 6393a40ed3dSBarry Smith PetscFunctionBegin; 64043a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 64138597bd4SSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 64243a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 64338597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 6443a40ed3dSBarry Smith PetscFunctionReturn(0); 645da3a660dSBarry Smith } 646da3a660dSBarry Smith 6475615d1e5SSatish Balay #undef __FUNC__ 6485615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIAIJ" 6498f6be9afSLois Curfman McInnes int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 650da3a660dSBarry Smith { 651416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 652da3a660dSBarry Smith int ierr; 653da3a660dSBarry Smith 6543a40ed3dSBarry Smith PetscFunctionBegin; 655da3a660dSBarry Smith /* do nondiagonal part */ 65638597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 657da3a660dSBarry Smith /* send it on its way */ 658537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 659da3a660dSBarry Smith /* do local part */ 66038597bd4SSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 661da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 662da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 663da3a660dSBarry Smith /* but is not perhaps always true. */ 664537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 6653a40ed3dSBarry Smith PetscFunctionReturn(0); 666da3a660dSBarry Smith } 667da3a660dSBarry Smith 6685615d1e5SSatish Balay #undef __FUNC__ 6695615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIAIJ" 6708f6be9afSLois Curfman McInnes int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 671da3a660dSBarry Smith { 672416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 673da3a660dSBarry Smith int ierr; 674da3a660dSBarry Smith 6753a40ed3dSBarry Smith PetscFunctionBegin; 676da3a660dSBarry Smith /* do nondiagonal part */ 67738597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 678da3a660dSBarry Smith /* send it on its way */ 679537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 680da3a660dSBarry Smith /* do local part */ 68138597bd4SSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 682da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 683da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 684da3a660dSBarry Smith /* but is not perhaps always true. */ 6850a198c4cSBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 6863a40ed3dSBarry Smith PetscFunctionReturn(0); 687da3a660dSBarry Smith } 688da3a660dSBarry Smith 6891eb62cbbSBarry Smith /* 6901eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 6911eb62cbbSBarry Smith diagonal block 6921eb62cbbSBarry Smith */ 6935615d1e5SSatish Balay #undef __FUNC__ 6945615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIAIJ" 6958f6be9afSLois Curfman McInnes int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 6961eb62cbbSBarry Smith { 6973a40ed3dSBarry Smith int ierr; 698416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 6993a40ed3dSBarry Smith 7003a40ed3dSBarry Smith PetscFunctionBegin; 7013a40ed3dSBarry Smith if (a->M != a->N) SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 7025baf8537SBarry Smith if (a->rstart != a->cstart || a->rend != a->cend) { 7033a40ed3dSBarry Smith SETERRQ(1,0,"row partition must equal col partition"); 7043a40ed3dSBarry Smith } 7053a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 7063a40ed3dSBarry Smith PetscFunctionReturn(0); 7071eb62cbbSBarry Smith } 7081eb62cbbSBarry Smith 7095615d1e5SSatish Balay #undef __FUNC__ 7105615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIAIJ" 7118f6be9afSLois Curfman McInnes int MatScale_MPIAIJ(Scalar *aa,Mat A) 712052efed2SBarry Smith { 713052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 714052efed2SBarry Smith int ierr; 7153a40ed3dSBarry Smith 7163a40ed3dSBarry Smith PetscFunctionBegin; 717052efed2SBarry Smith ierr = MatScale(aa,a->A); CHKERRQ(ierr); 718052efed2SBarry Smith ierr = MatScale(aa,a->B); CHKERRQ(ierr); 7193a40ed3dSBarry Smith PetscFunctionReturn(0); 720052efed2SBarry Smith } 721052efed2SBarry Smith 7225615d1e5SSatish Balay #undef __FUNC__ 723d4bb536fSBarry Smith #define __FUNC__ "MatDestroy_MPIAIJ" 7248f6be9afSLois Curfman McInnes int MatDestroy_MPIAIJ(PetscObject obj) 7251eb62cbbSBarry Smith { 7261eb62cbbSBarry Smith Mat mat = (Mat) obj; 72744a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 7281eb62cbbSBarry Smith int ierr; 72983e2fdc7SBarry Smith 7303a40ed3dSBarry Smith PetscFunctionBegin; 7313a40ed3dSBarry Smith #if defined(USE_PETSC_LOG) 7326e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 733a5a9c739SBarry Smith #endif 73483e2fdc7SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 7350452661fSBarry Smith PetscFree(aij->rowners); 73678b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 73778b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 7380452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 7390452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 7401eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 741a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 7427a0afa10SBarry Smith if (aij->rowvalues) PetscFree(aij->rowvalues); 7430452661fSBarry Smith PetscFree(aij); 744a5a9c739SBarry Smith PLogObjectDestroy(mat); 7450452661fSBarry Smith PetscHeaderDestroy(mat); 7463a40ed3dSBarry Smith PetscFunctionReturn(0); 7471eb62cbbSBarry Smith } 748ee50ffe9SBarry Smith 7495615d1e5SSatish Balay #undef __FUNC__ 750d4bb536fSBarry Smith #define __FUNC__ "MatView_MPIAIJ_Binary" 7518f6be9afSLois Curfman McInnes extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 7521eb62cbbSBarry Smith { 753416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 754416022c9SBarry Smith int ierr; 755416022c9SBarry Smith 7563a40ed3dSBarry Smith PetscFunctionBegin; 75717699dbbSLois Curfman McInnes if (aij->size == 1) { 758416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 759416022c9SBarry Smith } 760e3372554SBarry Smith else SETERRQ(1,0,"Only uniprocessor output supported"); 7613a40ed3dSBarry Smith PetscFunctionReturn(0); 762416022c9SBarry Smith } 763416022c9SBarry Smith 7645615d1e5SSatish Balay #undef __FUNC__ 765d4bb536fSBarry Smith #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 7668f6be9afSLois Curfman McInnes extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 767416022c9SBarry Smith { 76844a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 769dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 770a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 771d636dbe3SBarry Smith FILE *fd; 77219bcc07fSBarry Smith ViewerType vtype; 773416022c9SBarry Smith 7743a40ed3dSBarry Smith PetscFunctionBegin; 77519bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 77619bcc07fSBarry Smith if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 77790ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 7780a198c4cSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 7794e220ebcSLois Curfman McInnes MatInfo info; 7804e220ebcSLois Curfman McInnes int flg; 781a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 78290ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 7834e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 78495e01e2fSLois Curfman McInnes ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 78577c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 78695e01e2fSLois Curfman McInnes if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 7874e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 78895e01e2fSLois Curfman McInnes else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 7894e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 7904e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 7914e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 7924e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 7934e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 794a56f8943SBarry Smith fflush(fd); 79577c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 796a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 7973a40ed3dSBarry Smith PetscFunctionReturn(0); 7983a40ed3dSBarry Smith } else if (format == VIEWER_FORMAT_ASCII_INFO) { 7993a40ed3dSBarry Smith PetscFunctionReturn(0); 80008480c60SBarry Smith } 801416022c9SBarry Smith } 802416022c9SBarry Smith 80319bcc07fSBarry Smith if (vtype == DRAW_VIEWER) { 80419bcc07fSBarry Smith Draw draw; 80519bcc07fSBarry Smith PetscTruth isnull; 80619bcc07fSBarry Smith ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 8073a40ed3dSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 80819bcc07fSBarry Smith } 80919bcc07fSBarry Smith 81019bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER) { 81190ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 81277c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 813d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 81417699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 8151eb62cbbSBarry Smith aij->cend); 81678b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 81778b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 818d13ab20cSBarry Smith fflush(fd); 81977c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 8203a40ed3dSBarry Smith } else { 821a56f8943SBarry Smith int size = aij->size; 822a56f8943SBarry Smith rank = aij->rank; 82317699dbbSLois Curfman McInnes if (size == 1) { 82478b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 8253a40ed3dSBarry Smith } else { 82695373324SBarry Smith /* assemble the entire matrix onto first processor. */ 82795373324SBarry Smith Mat A; 828ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 8292eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 83095373324SBarry Smith Scalar *a; 8312ee70a88SLois Curfman McInnes 83217699dbbSLois Curfman McInnes if (!rank) { 833b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 834c451ab25SLois Curfman McInnes CHKERRQ(ierr); 8353a40ed3dSBarry Smith } else { 836b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 837c451ab25SLois Curfman McInnes CHKERRQ(ierr); 83895373324SBarry Smith } 839464493b3SBarry Smith PLogObjectParent(mat,A); 840416022c9SBarry Smith 84195373324SBarry Smith /* copy over the A part */ 842ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 8432ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 84495373324SBarry Smith row = aij->rstart; 845dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 84695373324SBarry Smith for ( i=0; i<m; i++ ) { 847416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 84895373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 84995373324SBarry Smith } 8502ee70a88SLois Curfman McInnes aj = Aloc->j; 851dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 85295373324SBarry Smith 85395373324SBarry Smith /* copy over the B part */ 854ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 8552ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 85695373324SBarry Smith row = aij->rstart; 8570452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 858dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 85995373324SBarry Smith for ( i=0; i<m; i++ ) { 860416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 86195373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 86295373324SBarry Smith } 8630452661fSBarry Smith PetscFree(ct); 8646d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 8656d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 86617699dbbSLois Curfman McInnes if (!rank) { 86778b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 86895373324SBarry Smith } 86978b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 87095373324SBarry Smith } 87195373324SBarry Smith } 8723a40ed3dSBarry Smith PetscFunctionReturn(0); 8731eb62cbbSBarry Smith } 8741eb62cbbSBarry Smith 8755615d1e5SSatish Balay #undef __FUNC__ 876d4bb536fSBarry Smith #define __FUNC__ "MatView_MPIAIJ" 8778f6be9afSLois Curfman McInnes int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 878416022c9SBarry Smith { 879416022c9SBarry Smith Mat mat = (Mat) obj; 880416022c9SBarry Smith int ierr; 88119bcc07fSBarry Smith ViewerType vtype; 882416022c9SBarry Smith 8833a40ed3dSBarry Smith PetscFunctionBegin; 88419bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 88519bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 88619bcc07fSBarry Smith vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 887d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 888416022c9SBarry Smith } 88919bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 8903a40ed3dSBarry Smith ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 891416022c9SBarry Smith } 8923a40ed3dSBarry Smith PetscFunctionReturn(0); 893416022c9SBarry Smith } 894416022c9SBarry Smith 8951eb62cbbSBarry Smith /* 8961eb62cbbSBarry Smith This has to provide several versions. 8971eb62cbbSBarry Smith 8981eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 8991eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 900d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 9011eb62cbbSBarry Smith */ 9025615d1e5SSatish Balay #undef __FUNC__ 9035615d1e5SSatish Balay #define __FUNC__ "MatRelax_MPIAIJ" 9048f6be9afSLois Curfman McInnes int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 905dbb450caSBarry Smith double fshift,int its,Vec xx) 9068a729477SBarry Smith { 90744a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 908d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 909ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 910c16cb8f2SBarry Smith Scalar *b,*x,*xs,*ls,d,*v,sum; 9116abc6512SBarry Smith int ierr,*idx, *diag; 912416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 9138a729477SBarry Smith 9143a40ed3dSBarry Smith PetscFunctionBegin; 915d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 916dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 917dbb450caSBarry Smith ls = ls + shift; 91883e2fdc7SBarry Smith if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 919d6dfbf8fSBarry Smith diag = A->diag; 920c16cb8f2SBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 921da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 9223a40ed3dSBarry Smith ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 9233a40ed3dSBarry Smith PetscFunctionReturn(0); 924da3a660dSBarry Smith } 9253a40ed3dSBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 9263a40ed3dSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 927d6dfbf8fSBarry Smith while (its--) { 928d6dfbf8fSBarry Smith /* go down through the rows */ 929d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 930d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 931dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 932dbb450caSBarry Smith v = A->a + A->i[i] + shift; 933d6dfbf8fSBarry Smith sum = b[i]; 934d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 935dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 936d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 937dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 938dbb450caSBarry Smith v = B->a + B->i[i] + shift; 939d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 94055a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 941d6dfbf8fSBarry Smith } 942d6dfbf8fSBarry Smith /* come up through the rows */ 943d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 944d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 945dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 946dbb450caSBarry Smith v = A->a + A->i[i] + shift; 947d6dfbf8fSBarry Smith sum = b[i]; 948d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 949dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 950d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 951dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 952dbb450caSBarry Smith v = B->a + B->i[i] + shift; 953d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 95455a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 955d6dfbf8fSBarry Smith } 956d6dfbf8fSBarry Smith } 9573a40ed3dSBarry Smith } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 958da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 9593a40ed3dSBarry Smith ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 9603a40ed3dSBarry Smith PetscFunctionReturn(0); 961da3a660dSBarry Smith } 9623a40ed3dSBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 9633a40ed3dSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 964d6dfbf8fSBarry Smith while (its--) { 965d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 966d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 967dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 968dbb450caSBarry Smith v = A->a + A->i[i] + shift; 969d6dfbf8fSBarry Smith sum = b[i]; 970d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 971dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 972d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 973dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 974dbb450caSBarry Smith v = B->a + B->i[i] + shift; 975d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 97655a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 977d6dfbf8fSBarry Smith } 978d6dfbf8fSBarry Smith } 9793a40ed3dSBarry Smith } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 980da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 9813a40ed3dSBarry Smith ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 9823a40ed3dSBarry Smith PetscFunctionReturn(0); 983da3a660dSBarry Smith } 98443a90d84SBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 98578b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 98643a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 98778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 988d6dfbf8fSBarry Smith while (its--) { 989d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 990d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 991dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 992dbb450caSBarry Smith v = A->a + A->i[i] + shift; 993d6dfbf8fSBarry Smith sum = b[i]; 994d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 995dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 996d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 997dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 998dbb450caSBarry Smith v = B->a + B->i[i] + shift; 999d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 100055a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1001d6dfbf8fSBarry Smith } 1002d6dfbf8fSBarry Smith } 10033a40ed3dSBarry Smith } else { 1004e3372554SBarry Smith SETERRQ(1,0,"Option not supported"); 1005c16cb8f2SBarry Smith } 10063a40ed3dSBarry Smith PetscFunctionReturn(0); 10078a729477SBarry Smith } 1008a66be287SLois Curfman McInnes 10095615d1e5SSatish Balay #undef __FUNC__ 1010d4bb536fSBarry Smith #define __FUNC__ "MatGetInfo_MPIAIJ" 10118f6be9afSLois Curfman McInnes int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1012a66be287SLois Curfman McInnes { 1013a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1014a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 10154e220ebcSLois Curfman McInnes int ierr; 10164e220ebcSLois Curfman McInnes double isend[5], irecv[5]; 1017a66be287SLois Curfman McInnes 10183a40ed3dSBarry Smith PetscFunctionBegin; 10194e220ebcSLois Curfman McInnes info->rows_global = (double)mat->M; 10204e220ebcSLois Curfman McInnes info->columns_global = (double)mat->N; 10214e220ebcSLois Curfman McInnes info->rows_local = (double)mat->m; 10224e220ebcSLois Curfman McInnes info->columns_local = (double)mat->N; 10234e220ebcSLois Curfman McInnes info->block_size = 1.0; 10244e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 10254e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 10264e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 10274e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 10284e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 10294e220ebcSLois Curfman McInnes isend[3] += info->memory; isend[4] += info->mallocs; 1030a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 10314e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 10324e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 10334e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 10344e220ebcSLois Curfman McInnes info->memory = isend[3]; 10354e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 1036a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 1037*ca161407SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 10384e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 10394e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 10404e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 10414e220ebcSLois Curfman McInnes info->memory = irecv[3]; 10424e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1043a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 1044*ca161407SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 10454e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 10464e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 10474e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 10484e220ebcSLois Curfman McInnes info->memory = irecv[3]; 10494e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1050a66be287SLois Curfman McInnes } 10514e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 10524e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 10534e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 10544e220ebcSLois Curfman McInnes 10553a40ed3dSBarry Smith PetscFunctionReturn(0); 1056a66be287SLois Curfman McInnes } 1057a66be287SLois Curfman McInnes 10585615d1e5SSatish Balay #undef __FUNC__ 1059d4bb536fSBarry Smith #define __FUNC__ "MatSetOption_MPIAIJ" 10608f6be9afSLois Curfman McInnes int MatSetOption_MPIAIJ(Mat A,MatOption op) 1061c74985f6SBarry Smith { 1062c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1063c74985f6SBarry Smith 10643a40ed3dSBarry Smith PetscFunctionBegin; 10656d4a8577SBarry Smith if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 10666d4a8577SBarry Smith op == MAT_YES_NEW_NONZERO_LOCATIONS || 10676da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1068c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 106996854ed6SLois Curfman McInnes op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1070c2653b3dSLois Curfman McInnes op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1071b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 1072b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 1073b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1074aeafbbfcSLois Curfman McInnes a->roworiented = 1; 1075c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 1076c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 1077b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 10786da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 10796d4a8577SBarry Smith op == MAT_SYMMETRIC || 10806d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 10816d4a8577SBarry Smith op == MAT_YES_NEW_DIAGONALS) 108294a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 10836d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) { 10844b0e389bSBarry Smith a->roworiented = 0; 10854b0e389bSBarry Smith MatSetOption(a->A,op); 10864b0e389bSBarry Smith MatSetOption(a->B,op); 10872b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 108890f02eecSBarry Smith a->donotstash = 1; 10893a40ed3dSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS){ 10903a40ed3dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 10913a40ed3dSBarry Smith } else { 10923a40ed3dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 10933a40ed3dSBarry Smith } 10943a40ed3dSBarry Smith PetscFunctionReturn(0); 1095c74985f6SBarry Smith } 1096c74985f6SBarry Smith 10975615d1e5SSatish Balay #undef __FUNC__ 1098d4bb536fSBarry Smith #define __FUNC__ "MatGetSize_MPIAIJ" 10998f6be9afSLois Curfman McInnes int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1100c74985f6SBarry Smith { 110144a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 11023a40ed3dSBarry Smith 11033a40ed3dSBarry Smith PetscFunctionBegin; 11040752156aSBarry Smith if (m) *m = mat->M; 11050752156aSBarry Smith if (n) *n = mat->N; 11063a40ed3dSBarry Smith PetscFunctionReturn(0); 1107c74985f6SBarry Smith } 1108c74985f6SBarry Smith 11095615d1e5SSatish Balay #undef __FUNC__ 1110d4bb536fSBarry Smith #define __FUNC__ "MatGetLocalSize_MPIAIJ" 11118f6be9afSLois Curfman McInnes int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1112c74985f6SBarry Smith { 111344a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 11143a40ed3dSBarry Smith 11153a40ed3dSBarry Smith PetscFunctionBegin; 11160752156aSBarry Smith if (m) *m = mat->m; 11170752156aSBarry Smith if (n) *n = mat->N; 11183a40ed3dSBarry Smith PetscFunctionReturn(0); 1119c74985f6SBarry Smith } 1120c74985f6SBarry Smith 11215615d1e5SSatish Balay #undef __FUNC__ 1122d4bb536fSBarry Smith #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 11238f6be9afSLois Curfman McInnes int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1124c74985f6SBarry Smith { 112544a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 11263a40ed3dSBarry Smith 11273a40ed3dSBarry Smith PetscFunctionBegin; 1128c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 11293a40ed3dSBarry Smith PetscFunctionReturn(0); 1130c74985f6SBarry Smith } 1131c74985f6SBarry Smith 11326d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11336d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11346d84be18SBarry Smith 11355615d1e5SSatish Balay #undef __FUNC__ 11365615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIAIJ" 11376d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 113839e00950SLois Curfman McInnes { 1139154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 114070f0671dSBarry Smith Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1141154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1142154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 114370f0671dSBarry Smith int *cmap, *idx_p; 114439e00950SLois Curfman McInnes 11453a40ed3dSBarry Smith PetscFunctionBegin; 1146e3372554SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 11477a0afa10SBarry Smith mat->getrowactive = PETSC_TRUE; 11487a0afa10SBarry Smith 114970f0671dSBarry Smith if (!mat->rowvalues && (idx || v)) { 11507a0afa10SBarry Smith /* 11517a0afa10SBarry Smith allocate enough space to hold information from the longest row. 11527a0afa10SBarry Smith */ 11537a0afa10SBarry Smith Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1154c16cb8f2SBarry Smith int max = 1,m = mat->m,tmp; 1155c16cb8f2SBarry Smith for ( i=0; i<m; i++ ) { 11567a0afa10SBarry Smith tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 11577a0afa10SBarry Smith if (max < tmp) { max = tmp; } 11587a0afa10SBarry Smith } 11597a0afa10SBarry Smith mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 11607a0afa10SBarry Smith CHKPTRQ(mat->rowvalues); 11617a0afa10SBarry Smith mat->rowindices = (int *) (mat->rowvalues + max); 11627a0afa10SBarry Smith } 11637a0afa10SBarry Smith 1164e3372554SBarry Smith if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1165abc0e9e4SLois Curfman McInnes lrow = row - rstart; 116639e00950SLois Curfman McInnes 1167154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1168154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1169154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 117038597bd4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 117138597bd4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1172154123eaSLois Curfman McInnes nztot = nzA + nzB; 1173154123eaSLois Curfman McInnes 117470f0671dSBarry Smith cmap = mat->garray; 1175154123eaSLois Curfman McInnes if (v || idx) { 1176154123eaSLois Curfman McInnes if (nztot) { 1177154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 117870f0671dSBarry Smith int imark = -1; 1179154123eaSLois Curfman McInnes if (v) { 118070f0671dSBarry Smith *v = v_p = mat->rowvalues; 118139e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 118270f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1183154123eaSLois Curfman McInnes else break; 1184154123eaSLois Curfman McInnes } 1185154123eaSLois Curfman McInnes imark = i; 118670f0671dSBarry Smith for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 118770f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1188154123eaSLois Curfman McInnes } 1189154123eaSLois Curfman McInnes if (idx) { 119070f0671dSBarry Smith *idx = idx_p = mat->rowindices; 119170f0671dSBarry Smith if (imark > -1) { 119270f0671dSBarry Smith for ( i=0; i<imark; i++ ) { 119370f0671dSBarry Smith idx_p[i] = cmap[cworkB[i]]; 119470f0671dSBarry Smith } 119570f0671dSBarry Smith } else { 1196154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 119770f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1198154123eaSLois Curfman McInnes else break; 1199154123eaSLois Curfman McInnes } 1200154123eaSLois Curfman McInnes imark = i; 120170f0671dSBarry Smith } 120270f0671dSBarry Smith for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 120370f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 120439e00950SLois Curfman McInnes } 120539e00950SLois Curfman McInnes } 12061ca473b0SSatish Balay else { 12071ca473b0SSatish Balay if (idx) *idx = 0; 12081ca473b0SSatish Balay if (v) *v = 0; 12091ca473b0SSatish Balay } 1210154123eaSLois Curfman McInnes } 121139e00950SLois Curfman McInnes *nz = nztot; 121238597bd4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 121338597bd4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 12143a40ed3dSBarry Smith PetscFunctionReturn(0); 121539e00950SLois Curfman McInnes } 121639e00950SLois Curfman McInnes 12175615d1e5SSatish Balay #undef __FUNC__ 1218d4bb536fSBarry Smith #define __FUNC__ "MatRestoreRow_MPIAIJ" 12196d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 122039e00950SLois Curfman McInnes { 12217a0afa10SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 12223a40ed3dSBarry Smith 12233a40ed3dSBarry Smith PetscFunctionBegin; 12247a0afa10SBarry Smith if (aij->getrowactive == PETSC_FALSE) { 1225e3372554SBarry Smith SETERRQ(1,0,"MatGetRow not called"); 12267a0afa10SBarry Smith } 12277a0afa10SBarry Smith aij->getrowactive = PETSC_FALSE; 12283a40ed3dSBarry Smith PetscFunctionReturn(0); 122939e00950SLois Curfman McInnes } 123039e00950SLois Curfman McInnes 12315615d1e5SSatish Balay #undef __FUNC__ 12325615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIAIJ" 12338f6be9afSLois Curfman McInnes int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1234855ac2c5SLois Curfman McInnes { 1235855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1236ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1237416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1238416022c9SBarry Smith double sum = 0.0; 123904ca555eSLois Curfman McInnes Scalar *v; 124004ca555eSLois Curfman McInnes 12413a40ed3dSBarry Smith PetscFunctionBegin; 124217699dbbSLois Curfman McInnes if (aij->size == 1) { 124314183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 124437fa93a5SLois Curfman McInnes } else { 124504ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 124604ca555eSLois Curfman McInnes v = amat->a; 124704ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 12483a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 124904ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 125004ca555eSLois Curfman McInnes #else 125104ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 125204ca555eSLois Curfman McInnes #endif 125304ca555eSLois Curfman McInnes } 125404ca555eSLois Curfman McInnes v = bmat->a; 125504ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 12563a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 125704ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 125804ca555eSLois Curfman McInnes #else 125904ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 126004ca555eSLois Curfman McInnes #endif 126104ca555eSLois Curfman McInnes } 1262*ca161407SBarry Smith ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 126304ca555eSLois Curfman McInnes *norm = sqrt(*norm); 12643a40ed3dSBarry Smith } else if (type == NORM_1) { /* max column norm */ 126504ca555eSLois Curfman McInnes double *tmp, *tmp2; 126604ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 1267758f045eSSatish Balay tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1268758f045eSSatish Balay tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1269cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 127004ca555eSLois Curfman McInnes *norm = 0.0; 127104ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 127204ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1273579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 127404ca555eSLois Curfman McInnes } 127504ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 127604ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1277579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 127804ca555eSLois Curfman McInnes } 1279*ca161407SBarry Smith ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 128004ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 128104ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 128204ca555eSLois Curfman McInnes } 12830452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 12843a40ed3dSBarry Smith } else if (type == NORM_INFINITY) { /* max row norm */ 1285515d9167SLois Curfman McInnes double ntemp = 0.0; 128604ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1287dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 128804ca555eSLois Curfman McInnes sum = 0.0; 128904ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1290cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 129104ca555eSLois Curfman McInnes } 1292dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 129304ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1294cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 129504ca555eSLois Curfman McInnes } 1296515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 129704ca555eSLois Curfman McInnes } 1298*ca161407SBarry Smith ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1299*ca161407SBarry Smith } else { 1300e3372554SBarry Smith SETERRQ(1,0,"No support for two norm"); 130104ca555eSLois Curfman McInnes } 130237fa93a5SLois Curfman McInnes } 13033a40ed3dSBarry Smith PetscFunctionReturn(0); 1304855ac2c5SLois Curfman McInnes } 1305855ac2c5SLois Curfman McInnes 13065615d1e5SSatish Balay #undef __FUNC__ 13075615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIAIJ" 13088f6be9afSLois Curfman McInnes int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1309b7c46309SBarry Smith { 1310b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1311dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1312416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1313b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 13143a40ed3dSBarry Smith Mat B; 1315b7c46309SBarry Smith Scalar *array; 1316b7c46309SBarry Smith 13173a40ed3dSBarry Smith PetscFunctionBegin; 1318d4bb536fSBarry Smith if (matout == PETSC_NULL && M != N) { 1319e3372554SBarry Smith SETERRQ(1,0,"Square matrix only for in-place"); 1320d4bb536fSBarry Smith } 1321d4bb536fSBarry Smith 1322d4bb536fSBarry Smith ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1323b7c46309SBarry Smith 1324b7c46309SBarry Smith /* copy over the A part */ 1325ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1326b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1327b7c46309SBarry Smith row = a->rstart; 1328dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1329b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1330416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1331b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1332b7c46309SBarry Smith } 1333b7c46309SBarry Smith aj = Aloc->j; 13344af08d9eSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1335b7c46309SBarry Smith 1336b7c46309SBarry Smith /* copy over the B part */ 1337ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1338b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1339b7c46309SBarry Smith row = a->rstart; 13400452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1341dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1342b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1343416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1344b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1345b7c46309SBarry Smith } 13460452661fSBarry Smith PetscFree(ct); 13476d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 13486d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 13493638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 13500de55854SLois Curfman McInnes *matout = B; 13510de55854SLois Curfman McInnes } else { 13520de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 13530452661fSBarry Smith PetscFree(a->rowners); 13540de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 13550de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 13560452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 13570452661fSBarry Smith if (a->garray) PetscFree(a->garray); 13580de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1359a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 13600452661fSBarry Smith PetscFree(a); 1361f09e8eb9SSatish Balay PetscMemcpy(A,B,sizeof(struct _p_Mat)); 13620452661fSBarry Smith PetscHeaderDestroy(B); 13630de55854SLois Curfman McInnes } 13643a40ed3dSBarry Smith PetscFunctionReturn(0); 1365b7c46309SBarry Smith } 1366b7c46309SBarry Smith 13675615d1e5SSatish Balay #undef __FUNC__ 13685615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIAIJ" 13694b967eb1SSatish Balay int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1370a008b906SSatish Balay { 13714b967eb1SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 13724b967eb1SSatish Balay Mat a = aij->A, b = aij->B; 1373a008b906SSatish Balay int ierr,s1,s2,s3; 1374a008b906SSatish Balay 13753a40ed3dSBarry Smith PetscFunctionBegin; 13764b967eb1SSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 13774b967eb1SSatish Balay if (rr) { 13784b967eb1SSatish Balay s3 = aij->n; 13794b967eb1SSatish Balay VecGetLocalSize_Fast(rr,s1); 1380e3372554SBarry Smith if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 13814b967eb1SSatish Balay /* Overlap communication with computation. */ 138243a90d84SBarry Smith ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1383a008b906SSatish Balay } 13844b967eb1SSatish Balay if (ll) { 13854b967eb1SSatish Balay VecGetLocalSize_Fast(ll,s1); 1386e3372554SBarry Smith if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1387c351683dSSatish Balay ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 13884b967eb1SSatish Balay } 13894b967eb1SSatish Balay /* scale the diagonal block */ 1390c351683dSSatish Balay ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 13914b967eb1SSatish Balay 13924b967eb1SSatish Balay if (rr) { 13934b967eb1SSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 139443a90d84SBarry Smith ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1395c351683dSSatish Balay ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 13964b967eb1SSatish Balay } 13974b967eb1SSatish Balay 13983a40ed3dSBarry Smith PetscFunctionReturn(0); 1399a008b906SSatish Balay } 1400a008b906SSatish Balay 1401a008b906SSatish Balay 1402682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat); 14035615d1e5SSatish Balay #undef __FUNC__ 1404d4bb536fSBarry Smith #define __FUNC__ "MatPrintHelp_MPIAIJ" 14058f6be9afSLois Curfman McInnes int MatPrintHelp_MPIAIJ(Mat A) 1406682d7d0cSBarry Smith { 1407682d7d0cSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 14083a40ed3dSBarry Smith int ierr; 1409682d7d0cSBarry Smith 14103a40ed3dSBarry Smith PetscFunctionBegin; 14113a40ed3dSBarry Smith if (!a->rank) { 14123a40ed3dSBarry Smith ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 14133a40ed3dSBarry Smith } 14143a40ed3dSBarry Smith PetscFunctionReturn(0); 1415682d7d0cSBarry Smith } 1416682d7d0cSBarry Smith 14175615d1e5SSatish Balay #undef __FUNC__ 1418d4bb536fSBarry Smith #define __FUNC__ "MatGetBlockSize_MPIAIJ" 14198f6be9afSLois Curfman McInnes int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 14205a838052SSatish Balay { 14213a40ed3dSBarry Smith PetscFunctionBegin; 14225a838052SSatish Balay *bs = 1; 14233a40ed3dSBarry Smith PetscFunctionReturn(0); 14245a838052SSatish Balay } 14255615d1e5SSatish Balay #undef __FUNC__ 1426d4bb536fSBarry Smith #define __FUNC__ "MatSetUnfactored_MPIAIJ" 14278f6be9afSLois Curfman McInnes int MatSetUnfactored_MPIAIJ(Mat A) 1428bb5a7306SBarry Smith { 1429bb5a7306SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1430bb5a7306SBarry Smith int ierr; 14313a40ed3dSBarry Smith 14323a40ed3dSBarry Smith PetscFunctionBegin; 1433bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 14343a40ed3dSBarry Smith PetscFunctionReturn(0); 1435bb5a7306SBarry Smith } 1436bb5a7306SBarry Smith 1437d4bb536fSBarry Smith #undef __FUNC__ 1438d4bb536fSBarry Smith #define __FUNC__ "MatEqual_MPIAIJ" 1439d4bb536fSBarry Smith int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1440d4bb536fSBarry Smith { 1441d4bb536fSBarry Smith Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1442d4bb536fSBarry Smith Mat a, b, c, d; 1443d4bb536fSBarry Smith PetscTruth flg; 1444d4bb536fSBarry Smith int ierr; 1445d4bb536fSBarry Smith 14463a40ed3dSBarry Smith PetscFunctionBegin; 1447d4bb536fSBarry Smith if (B->type != MATMPIAIJ) SETERRQ(1,0,"Matrices must be same type"); 1448d4bb536fSBarry Smith a = matA->A; b = matA->B; 1449d4bb536fSBarry Smith c = matB->A; d = matB->B; 1450d4bb536fSBarry Smith 1451d4bb536fSBarry Smith ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1452d4bb536fSBarry Smith if (flg == PETSC_TRUE) { 1453d4bb536fSBarry Smith ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1454d4bb536fSBarry Smith } 1455*ca161407SBarry Smith ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 14563a40ed3dSBarry Smith PetscFunctionReturn(0); 1457d4bb536fSBarry Smith } 1458d4bb536fSBarry Smith 14598f6be9afSLois Curfman McInnes extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 14602f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 14610a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 14620a198c4cSBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 146300e6dbe6SBarry Smith extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,MatGetSubMatrixCall,Mat *); 146400e6dbe6SBarry Smith 14658a729477SBarry Smith /* -------------------------------------------------------------------*/ 14662ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 146739e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 146844a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 146944a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 147036ce4990SBarry Smith 0,0, 147136ce4990SBarry Smith 0,0, 147236ce4990SBarry Smith 0,0, 147344a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1474b7c46309SBarry Smith MatTranspose_MPIAIJ, 1475d4bb536fSBarry Smith MatGetInfo_MPIAIJ,MatEqual_MPIAIJ, 1476a008b906SSatish Balay MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1477ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 14781eb62cbbSBarry Smith 0, 1479299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 148036ce4990SBarry Smith 0,0,0,0, 1481d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 148236ce4990SBarry Smith 0,0, 148394a9d846SBarry Smith 0,0,MatConvertSameType_MPIAIJ,0,0, 1484b49de8d1SLois Curfman McInnes 0,0,0, 1485598137ffSSatish Balay MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1486052efed2SBarry Smith MatPrintHelp_MPIAIJ, 14873b2fbd54SBarry Smith MatScale_MPIAIJ,0,0,0, 14880a198c4cSBarry Smith MatGetBlockSize_MPIAIJ,0,0,0,0, 148900e6dbe6SBarry Smith MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ, 1490*ca161407SBarry Smith 0,0,MatGetSubMatrix_MPIAIJ}; 149136ce4990SBarry Smith 14928a729477SBarry Smith 14935615d1e5SSatish Balay #undef __FUNC__ 14945615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIAIJ" 14951987afe7SBarry Smith /*@C 1496ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 14973a511b96SLois Curfman McInnes (the default parallel PETSc format). For good matrix assembly performance 14983a511b96SLois Curfman McInnes the user should preallocate the matrix storage by setting the parameters 14993a511b96SLois Curfman McInnes d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 15003a511b96SLois Curfman McInnes performance can be increased by more than a factor of 50. 15018a729477SBarry Smith 15028a729477SBarry Smith Input Parameters: 15031eb62cbbSBarry Smith . comm - MPI communicator 15047d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 150592e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 150692e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 15071a3896d6SBarry Smith . n - This value should be the same as the local size used in creating the 15081a3896d6SBarry Smith x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 15091a3896d6SBarry Smith calculated if N is given) For square matrices n is almost always m. 15107d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 151192e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1512ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1513ff756334SLois Curfman McInnes (same for all local rows) 15142bd5e0b2SLois Curfman McInnes . d_nzz - array containing the number of nonzeros in the various rows of the 151592e8d321SLois Curfman McInnes diagonal portion of the local submatrix (possibly different for each row) 15162bd5e0b2SLois Curfman McInnes or PETSC_NULL. You must leave room for the diagonal entry even if 15172bd5e0b2SLois Curfman McInnes it is zero. 15182bd5e0b2SLois Curfman McInnes . o_nz - number of nonzeros per row in the off-diagonal portion of local 1519ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 15202bd5e0b2SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various rows of the 15212bd5e0b2SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 15222bd5e0b2SLois Curfman McInnes each row) or PETSC_NULL. 15238a729477SBarry Smith 1524ff756334SLois Curfman McInnes Output Parameter: 152544cd7ae7SLois Curfman McInnes . A - the matrix 15268a729477SBarry Smith 1527ff756334SLois Curfman McInnes Notes: 1528ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1529ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 15300002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 15310002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1532ff756334SLois Curfman McInnes 1533ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1534ff756334SLois Curfman McInnes (possibly both). 1535ff756334SLois Curfman McInnes 15365511cfe3SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 15375511cfe3SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 15385511cfe3SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 15395511cfe3SLois Curfman McInnes 15405511cfe3SLois Curfman McInnes Options Database Keys: 15415511cfe3SLois Curfman McInnes $ -mat_aij_no_inode - Do not use inodes 15426e62573dSLois Curfman McInnes $ -mat_aij_inode_limit <limit> - Set inode limit. 15436e62573dSLois Curfman McInnes $ (max limit=5) 15446e62573dSLois Curfman McInnes $ -mat_aij_oneindex - Internally use indexing starting at 1 15456e62573dSLois Curfman McInnes $ rather than 0. Note: When calling MatSetValues(), 15466e62573dSLois Curfman McInnes $ the user still MUST index entries starting at 0! 15475511cfe3SLois Curfman McInnes 1548e0245417SLois Curfman McInnes Storage Information: 1549e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1550e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1551e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1552e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1553e0245417SLois Curfman McInnes 1554e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 15555ace5be8SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set 15565ace5be8SLois Curfman McInnes d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 15575ace5be8SLois Curfman McInnes memory allocation. Likewise, specify preallocated storage for the 15585ace5be8SLois Curfman McInnes off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1559ff756334SLois Curfman McInnes 15605511cfe3SLois Curfman McInnes Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 15615511cfe3SLois Curfman McInnes the figure below we depict these three local rows and all columns (0-11). 15622191d07cSBarry Smith 1563b810aeb4SBarry Smith $ 0 1 2 3 4 5 6 7 8 9 10 11 1564b810aeb4SBarry Smith $ ------------------- 15655511cfe3SLois Curfman McInnes $ row 3 | o o o d d d o o o o o o 15665511cfe3SLois Curfman McInnes $ row 4 | o o o d d d o o o o o o 15675511cfe3SLois Curfman McInnes $ row 5 | o o o d d d o o o o o o 15685511cfe3SLois Curfman McInnes $ ------------------- 1569b810aeb4SBarry Smith $ 1570b810aeb4SBarry Smith 15715511cfe3SLois Curfman McInnes Thus, any entries in the d locations are stored in the d (diagonal) 15725511cfe3SLois Curfman McInnes submatrix, and any entries in the o locations are stored in the 15735511cfe3SLois Curfman McInnes o (off-diagonal) submatrix. Note that the d and the o submatrices are 15745511cfe3SLois Curfman McInnes stored simply in the MATSEQAIJ format for compressed row storage. 15755511cfe3SLois Curfman McInnes 15765511cfe3SLois Curfman McInnes Now d_nz should indicate the number of nonzeros per row in the d matrix, 15775511cfe3SLois Curfman McInnes and o_nz should indicate the number of nonzeros per row in the o matrix. 15785511cfe3SLois Curfman McInnes In general, for PDE problems in which most nonzeros are near the diagonal, 15793d323bbdSBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 158092e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 15816da5968aSLois Curfman McInnes matrices. 15823a511b96SLois Curfman McInnes 1583dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1584ff756334SLois Curfman McInnes 1585fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 15868a729477SBarry Smith @*/ 15871eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 158844cd7ae7SLois Curfman McInnes int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 15898a729477SBarry Smith { 159044cd7ae7SLois Curfman McInnes Mat B; 159144cd7ae7SLois Curfman McInnes Mat_MPIAIJ *b; 159236ce4990SBarry Smith int ierr, i,sum[2],work[2],size; 1593416022c9SBarry Smith 15943a40ed3dSBarry Smith PetscFunctionBegin; 15953914022bSBarry Smith MPI_Comm_size(comm,&size); 15963914022bSBarry Smith if (size == 1) { 15973914022bSBarry Smith if (M == PETSC_DECIDE) M = m; 15983914022bSBarry Smith if (N == PETSC_DECIDE) N = n; 15993914022bSBarry Smith ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 16003a40ed3dSBarry Smith PetscFunctionReturn(0); 16013914022bSBarry Smith } 16023914022bSBarry Smith 160344cd7ae7SLois Curfman McInnes *A = 0; 1604d4bb536fSBarry Smith PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView); 160544cd7ae7SLois Curfman McInnes PLogObjectCreate(B); 160644cd7ae7SLois Curfman McInnes B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 160744cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_MPIAIJ)); 160844cd7ae7SLois Curfman McInnes PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 160944cd7ae7SLois Curfman McInnes B->destroy = MatDestroy_MPIAIJ; 161044cd7ae7SLois Curfman McInnes B->view = MatView_MPIAIJ; 161144cd7ae7SLois Curfman McInnes B->factor = 0; 161244cd7ae7SLois Curfman McInnes B->assembled = PETSC_FALSE; 161390f02eecSBarry Smith B->mapping = 0; 1614d6dfbf8fSBarry Smith 161547794344SBarry Smith B->insertmode = NOT_SET_VALUES; 16169eb4d147SSatish Balay b->size = size; 161744cd7ae7SLois Curfman McInnes MPI_Comm_rank(comm,&b->rank); 16181eb62cbbSBarry Smith 16193a40ed3dSBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1620e3372554SBarry Smith SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 16213a40ed3dSBarry Smith } 16221987afe7SBarry Smith 1623dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 16241eb62cbbSBarry Smith work[0] = m; work[1] = n; 1625*ca161407SBarry Smith ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1626dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1627dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 16281eb62cbbSBarry Smith } 162944cd7ae7SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 163044cd7ae7SLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 163144cd7ae7SLois Curfman McInnes b->m = m; B->m = m; 163244cd7ae7SLois Curfman McInnes b->n = n; B->n = n; 163344cd7ae7SLois Curfman McInnes b->N = N; B->N = N; 163444cd7ae7SLois Curfman McInnes b->M = M; B->M = M; 16351eb62cbbSBarry Smith 16361eb62cbbSBarry Smith /* build local table of row and column ownerships */ 163744cd7ae7SLois Curfman McInnes b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1638f09e8eb9SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1639603f58a4SSatish Balay b->cowners = b->rowners + b->size + 2; 1640*ca161407SBarry Smith ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 164144cd7ae7SLois Curfman McInnes b->rowners[0] = 0; 164244cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 164344cd7ae7SLois Curfman McInnes b->rowners[i] += b->rowners[i-1]; 16448a729477SBarry Smith } 164544cd7ae7SLois Curfman McInnes b->rstart = b->rowners[b->rank]; 164644cd7ae7SLois Curfman McInnes b->rend = b->rowners[b->rank+1]; 1647*ca161407SBarry Smith ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 164844cd7ae7SLois Curfman McInnes b->cowners[0] = 0; 164944cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 165044cd7ae7SLois Curfman McInnes b->cowners[i] += b->cowners[i-1]; 16518a729477SBarry Smith } 165244cd7ae7SLois Curfman McInnes b->cstart = b->cowners[b->rank]; 165344cd7ae7SLois Curfman McInnes b->cend = b->cowners[b->rank+1]; 16548a729477SBarry Smith 16555ace5be8SLois Curfman McInnes if (d_nz == PETSC_DEFAULT) d_nz = 5; 1656029af93fSBarry Smith ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 165744cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->A); 16587b8455f0SLois Curfman McInnes if (o_nz == PETSC_DEFAULT) o_nz = 0; 1659029af93fSBarry Smith ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 166044cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->B); 16618a729477SBarry Smith 16621eb62cbbSBarry Smith /* build cache for off array entries formed */ 166344cd7ae7SLois Curfman McInnes ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 166490f02eecSBarry Smith b->donotstash = 0; 166544cd7ae7SLois Curfman McInnes b->colmap = 0; 166644cd7ae7SLois Curfman McInnes b->garray = 0; 166744cd7ae7SLois Curfman McInnes b->roworiented = 1; 16688a729477SBarry Smith 16691eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 167044cd7ae7SLois Curfman McInnes b->lvec = 0; 167144cd7ae7SLois Curfman McInnes b->Mvctx = 0; 16728a729477SBarry Smith 16737a0afa10SBarry Smith /* stuff for MatGetRow() */ 167444cd7ae7SLois Curfman McInnes b->rowindices = 0; 167544cd7ae7SLois Curfman McInnes b->rowvalues = 0; 167644cd7ae7SLois Curfman McInnes b->getrowactive = PETSC_FALSE; 16777a0afa10SBarry Smith 167844cd7ae7SLois Curfman McInnes *A = B; 16793a40ed3dSBarry Smith PetscFunctionReturn(0); 1680d6dfbf8fSBarry Smith } 1681c74985f6SBarry Smith 16825615d1e5SSatish Balay #undef __FUNC__ 16835615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIAIJ" 16848f6be9afSLois Curfman McInnes int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1685d6dfbf8fSBarry Smith { 1686d6dfbf8fSBarry Smith Mat mat; 1687416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1688a1b97e82SLois Curfman McInnes int ierr, len=0, flg; 1689d6dfbf8fSBarry Smith 16903a40ed3dSBarry Smith PetscFunctionBegin; 1691416022c9SBarry Smith *newmat = 0; 1692d4bb536fSBarry Smith PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView); 1693a5a9c739SBarry Smith PLogObjectCreate(mat); 16940452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1695416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 169644a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 169744a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1698d6dfbf8fSBarry Smith mat->factor = matin->factor; 1699c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1700d6dfbf8fSBarry Smith 170144cd7ae7SLois Curfman McInnes a->m = mat->m = oldmat->m; 170244cd7ae7SLois Curfman McInnes a->n = mat->n = oldmat->n; 170344cd7ae7SLois Curfman McInnes a->M = mat->M = oldmat->M; 170444cd7ae7SLois Curfman McInnes a->N = mat->N = oldmat->N; 1705d6dfbf8fSBarry Smith 1706416022c9SBarry Smith a->rstart = oldmat->rstart; 1707416022c9SBarry Smith a->rend = oldmat->rend; 1708416022c9SBarry Smith a->cstart = oldmat->cstart; 1709416022c9SBarry Smith a->cend = oldmat->cend; 171017699dbbSLois Curfman McInnes a->size = oldmat->size; 171117699dbbSLois Curfman McInnes a->rank = oldmat->rank; 171247794344SBarry Smith mat->insertmode = NOT_SET_VALUES; 1713bcd2baecSBarry Smith a->rowvalues = 0; 1714bcd2baecSBarry Smith a->getrowactive = PETSC_FALSE; 1715d6dfbf8fSBarry Smith 1716603f58a4SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1717f09e8eb9SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1718603f58a4SSatish Balay a->cowners = a->rowners + a->size + 2; 1719603f58a4SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1720416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 17212ee70a88SLois Curfman McInnes if (oldmat->colmap) { 17220452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1723416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1724416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1725416022c9SBarry Smith } else a->colmap = 0; 17263f41c07dSBarry Smith if (oldmat->garray) { 17273f41c07dSBarry Smith len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 17283f41c07dSBarry Smith a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1729464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 17303f41c07dSBarry Smith if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1731416022c9SBarry Smith } else a->garray = 0; 1732d6dfbf8fSBarry Smith 1733416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1734416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1735a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1736416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1737416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1738416022c9SBarry Smith PLogObjectParent(mat,a->A); 1739416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1740416022c9SBarry Smith PLogObjectParent(mat,a->B); 17415dd7a6c7SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 174225cdf11fSBarry Smith if (flg) { 1743682d7d0cSBarry Smith ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1744682d7d0cSBarry Smith } 17458a729477SBarry Smith *newmat = mat; 17463a40ed3dSBarry Smith PetscFunctionReturn(0); 17478a729477SBarry Smith } 1748416022c9SBarry Smith 174977c4ece6SBarry Smith #include "sys.h" 1750416022c9SBarry Smith 17515615d1e5SSatish Balay #undef __FUNC__ 17525615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIAIJ" 175319bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1754416022c9SBarry Smith { 1755d65a2f8fSBarry Smith Mat A; 1756d65a2f8fSBarry Smith Scalar *vals,*svals; 175719bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 1758416022c9SBarry Smith MPI_Status status; 17593a40ed3dSBarry Smith int i, nz, ierr, j,rstart, rend, fd; 176017699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1761d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 176219bcc07fSBarry Smith int tag = ((PetscObject)viewer)->tag; 1763416022c9SBarry Smith 17643a40ed3dSBarry Smith PetscFunctionBegin; 176517699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 176617699dbbSLois Curfman McInnes if (!rank) { 176790ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 17680752156aSBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1769e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1770d64ed03dSBarry Smith if (header[3] < 0) { 1771d64ed03dSBarry Smith SETERRQ(1,1,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 1772d64ed03dSBarry Smith } 17736c5fab8fSBarry Smith } 17746c5fab8fSBarry Smith 1775d64ed03dSBarry Smith 1776*ca161407SBarry Smith ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1777416022c9SBarry Smith M = header[1]; N = header[2]; 1778416022c9SBarry Smith /* determine ownership of all rows */ 177917699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 17800452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1781*ca161407SBarry Smith ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1782416022c9SBarry Smith rowners[0] = 0; 178317699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1784416022c9SBarry Smith rowners[i] += rowners[i-1]; 1785416022c9SBarry Smith } 178617699dbbSLois Curfman McInnes rstart = rowners[rank]; 178717699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1788416022c9SBarry Smith 1789416022c9SBarry Smith /* distribute row lengths to all processors */ 17900452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1791416022c9SBarry Smith offlens = ourlens + (rend-rstart); 179217699dbbSLois Curfman McInnes if (!rank) { 17930452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 17940752156aSBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 17950452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 179617699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1797*ca161407SBarry Smith ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 17980452661fSBarry Smith PetscFree(sndcounts); 17993a40ed3dSBarry Smith } else { 1800*ca161407SBarry Smith ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1801416022c9SBarry Smith } 1802416022c9SBarry Smith 180317699dbbSLois Curfman McInnes if (!rank) { 1804416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 18050452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1806cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 180717699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1808416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1809416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1810416022c9SBarry Smith } 1811416022c9SBarry Smith } 18120452661fSBarry Smith PetscFree(rowlengths); 1813416022c9SBarry Smith 1814416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1815416022c9SBarry Smith maxnz = 0; 181617699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 18170452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 1818416022c9SBarry Smith } 18190452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1820416022c9SBarry Smith 1821416022c9SBarry Smith /* read in my part of the matrix column indices */ 1822416022c9SBarry Smith nz = procsnz[0]; 18230452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 18240752156aSBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1825d65a2f8fSBarry Smith 1826d65a2f8fSBarry Smith /* read in every one elses and ship off */ 182717699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1828d65a2f8fSBarry Smith nz = procsnz[i]; 18290752156aSBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1830*ca161407SBarry Smith ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1831d65a2f8fSBarry Smith } 18320452661fSBarry Smith PetscFree(cols); 18333a40ed3dSBarry Smith } else { 1834416022c9SBarry Smith /* determine buffer space needed for message */ 1835416022c9SBarry Smith nz = 0; 1836416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1837416022c9SBarry Smith nz += ourlens[i]; 1838416022c9SBarry Smith } 18390452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1840416022c9SBarry Smith 1841416022c9SBarry Smith /* receive message of column indices*/ 1842*ca161407SBarry Smith ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1843*ca161407SBarry Smith ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1844e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1845416022c9SBarry Smith } 1846416022c9SBarry Smith 1847416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1848cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 1849416022c9SBarry Smith jj = 0; 1850416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1851416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1852d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1853416022c9SBarry Smith jj++; 1854416022c9SBarry Smith } 1855416022c9SBarry Smith } 1856d65a2f8fSBarry Smith 1857d65a2f8fSBarry Smith /* create our matrix */ 1858416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1859416022c9SBarry Smith ourlens[i] -= offlens[i]; 1860416022c9SBarry Smith } 1861d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1862d65a2f8fSBarry Smith A = *newmat; 18636d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 1864d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1865d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1866d65a2f8fSBarry Smith } 1867416022c9SBarry Smith 186817699dbbSLois Curfman McInnes if (!rank) { 18690452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1870416022c9SBarry Smith 1871416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1872416022c9SBarry Smith nz = procsnz[0]; 18730752156aSBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1874d65a2f8fSBarry Smith 1875d65a2f8fSBarry Smith /* insert into matrix */ 1876d65a2f8fSBarry Smith jj = rstart; 1877d65a2f8fSBarry Smith smycols = mycols; 1878d65a2f8fSBarry Smith svals = vals; 1879d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1880d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1881d65a2f8fSBarry Smith smycols += ourlens[i]; 1882d65a2f8fSBarry Smith svals += ourlens[i]; 1883d65a2f8fSBarry Smith jj++; 1884416022c9SBarry Smith } 1885416022c9SBarry Smith 1886d65a2f8fSBarry Smith /* read in other processors and ship out */ 188717699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1888416022c9SBarry Smith nz = procsnz[i]; 18890752156aSBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1890*ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1891416022c9SBarry Smith } 18920452661fSBarry Smith PetscFree(procsnz); 18933a40ed3dSBarry Smith } else { 1894d65a2f8fSBarry Smith /* receive numeric values */ 18950452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1896416022c9SBarry Smith 1897d65a2f8fSBarry Smith /* receive message of values*/ 1898*ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1899*ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1900e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1901d65a2f8fSBarry Smith 1902d65a2f8fSBarry Smith /* insert into matrix */ 1903d65a2f8fSBarry Smith jj = rstart; 1904d65a2f8fSBarry Smith smycols = mycols; 1905d65a2f8fSBarry Smith svals = vals; 1906d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1907d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1908d65a2f8fSBarry Smith smycols += ourlens[i]; 1909d65a2f8fSBarry Smith svals += ourlens[i]; 1910d65a2f8fSBarry Smith jj++; 1911d65a2f8fSBarry Smith } 1912d65a2f8fSBarry Smith } 19130452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1914d65a2f8fSBarry Smith 19156d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 19166d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 19173a40ed3dSBarry Smith PetscFunctionReturn(0); 1918416022c9SBarry Smith } 1919a0ff6018SBarry Smith 192029da9460SBarry Smith #undef __FUNC__ 192129da9460SBarry Smith #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 1922a0ff6018SBarry Smith /* 192329da9460SBarry Smith Not great since it makes two copies of the submatrix, first an SeqAIJ 192429da9460SBarry Smith in local and then by concatenating the local matrices the end result. 192529da9460SBarry Smith Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1926a0ff6018SBarry Smith */ 1927a0ff6018SBarry Smith int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatGetSubMatrixCall call,Mat *newmat) 1928a0ff6018SBarry Smith { 192900e6dbe6SBarry Smith int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1930a0ff6018SBarry Smith Mat *local,M; 193100e6dbe6SBarry Smith Scalar *vwork,*aa; 193200e6dbe6SBarry Smith MPI_Comm comm = mat->comm; 193300e6dbe6SBarry Smith Mat_SeqAIJ *aij; 193400e6dbe6SBarry Smith int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 1935a0ff6018SBarry Smith 1936a0ff6018SBarry Smith PetscFunctionBegin; 193700e6dbe6SBarry Smith MPI_Comm_rank(comm,&rank); 193800e6dbe6SBarry Smith MPI_Comm_size(comm,&size); 193900e6dbe6SBarry Smith 1940a0ff6018SBarry Smith ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1941a0ff6018SBarry Smith 1942a0ff6018SBarry Smith /* 1943a0ff6018SBarry Smith m - number of local rows 1944a0ff6018SBarry Smith n - number of columns (same on all processors) 1945a0ff6018SBarry Smith rstart - first row in new global matrix generated 1946a0ff6018SBarry Smith */ 1947a0ff6018SBarry Smith ierr = MatGetSize(*local,&m,&n);CHKERRQ(ierr); 1948a0ff6018SBarry Smith if (call == MAT_INITIAL_MATRIX) { 194900e6dbe6SBarry Smith aij = (Mat_SeqAIJ *) (*local)->data; 195000e6dbe6SBarry Smith if (aij->indexshift) SETERRQ(1,1,"No support for index shifted matrix"); 195100e6dbe6SBarry Smith ii = aij->i; 195200e6dbe6SBarry Smith jj = aij->j; 195300e6dbe6SBarry Smith 1954a0ff6018SBarry Smith /* 195500e6dbe6SBarry Smith Determine the number of non-zeros in the diagonal and off-diagonal 195600e6dbe6SBarry Smith portions of the matrix in order to do correct preallocation 1957a0ff6018SBarry Smith */ 195800e6dbe6SBarry Smith 195900e6dbe6SBarry Smith /* first get start and end of "diagonal" columns */ 196000e6dbe6SBarry Smith nlocal = n/size + ((n % size) > rank); 1961*ca161407SBarry Smith ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 196200e6dbe6SBarry Smith rstart = rend - nlocal; 196300e6dbe6SBarry Smith 196400e6dbe6SBarry Smith /* next, compute all the lengths */ 196500e6dbe6SBarry Smith dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 196600e6dbe6SBarry Smith olens = dlens + m; 196700e6dbe6SBarry Smith for ( i=0; i<m; i++ ) { 196800e6dbe6SBarry Smith jend = ii[i+1] - ii[i]; 196900e6dbe6SBarry Smith olen = 0; 197000e6dbe6SBarry Smith dlen = 0; 197100e6dbe6SBarry Smith for ( j=0; j<jend; j++ ) { 197200e6dbe6SBarry Smith if ( *jj < rstart || *jj >= rend) olen++; 197300e6dbe6SBarry Smith else dlen++; 197400e6dbe6SBarry Smith jj++; 197500e6dbe6SBarry Smith } 197600e6dbe6SBarry Smith olens[i] = olen; 197700e6dbe6SBarry Smith dlens[i] = dlen; 197800e6dbe6SBarry Smith } 197900e6dbe6SBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 198000e6dbe6SBarry Smith PetscFree(dlens); 1981a0ff6018SBarry Smith } else { 1982a0ff6018SBarry Smith int ml,nl; 1983a0ff6018SBarry Smith 1984a0ff6018SBarry Smith M = *newmat; 1985a0ff6018SBarry Smith ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1986a0ff6018SBarry Smith if (ml != m) SETERRQ(1,1,"Previous matrix must be same size/layout as request"); 1987a0ff6018SBarry Smith ierr = MatZeroEntries(M);CHKERRQ(ierr); 1988a0ff6018SBarry Smith } 1989a0ff6018SBarry Smith ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 199000e6dbe6SBarry Smith aij = (Mat_SeqAIJ *) (*local)->data; 199100e6dbe6SBarry Smith if (aij->indexshift) SETERRQ(1,1,"No support for index shifted matrix"); 199200e6dbe6SBarry Smith ii = aij->i; 199300e6dbe6SBarry Smith jj = aij->j; 199400e6dbe6SBarry Smith aa = aij->a; 1995a0ff6018SBarry Smith for (i=0; i<m; i++) { 1996a0ff6018SBarry Smith row = rstart + i; 199700e6dbe6SBarry Smith nz = ii[i+1] - ii[i]; 199800e6dbe6SBarry Smith cwork = jj; jj += nz; 199900e6dbe6SBarry Smith vwork = aa; aa += nz; 20008c638d02SBarry Smith ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2001a0ff6018SBarry Smith } 2002a0ff6018SBarry Smith 2003a0ff6018SBarry Smith ierr = MatDestroyMatrices(1,&local);CHKERRQ(ierr); 2004a0ff6018SBarry Smith ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2005a0ff6018SBarry Smith ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2006a0ff6018SBarry Smith *newmat = M; 2007a0ff6018SBarry Smith PetscFunctionReturn(0); 2008a0ff6018SBarry Smith } 2009