1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*bc5ccf88SSatish Balay static char vcid[] = "$Id: mpiaij.c,v 1.285 1999/02/26 17:08:13 balay Exp balay $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 570f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h" 6f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 7d9942c19SSatish Balay #include "src/inline/spops.h" 88a729477SBarry Smith 9*bc5ccf88SSatish Balay extern int MatSetUpMultiply_MPIAIJ(Mat); 10*bc5ccf88SSatish Balay extern int DisAssemble_MPIAIJ(Mat); 11*bc5ccf88SSatish Balay extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 12*bc5ccf88SSatish Balay extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 13*bc5ccf88SSatish Balay extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 14*bc5ccf88SSatish Balay extern int MatPrintHelp_SeqAIJ(Mat); 15*bc5ccf88SSatish Balay 169e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 179e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 189e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 199e25ed09SBarry Smith length of colmap equals the global matrix length. 209e25ed09SBarry Smith */ 215615d1e5SSatish Balay #undef __FUNC__ 22d4bb536fSBarry Smith #define __FUNC__ "CreateColmap_MPIAIJ_Private" 230a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat) 249e25ed09SBarry Smith { 2544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 26ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 276dee3f7eSBarry Smith int n = B->n,i; 286dee3f7eSBarry Smith #if defined (USE_CTABLE) 296dee3f7eSBarry Smith int ierr; 306dee3f7eSBarry Smith #endif 31dbb450caSBarry Smith 323a40ed3dSBarry Smith PetscFunctionBegin; 33b1fc9764SSatish Balay #if defined (USE_CTABLE) 34fa46199cSSatish Balay ierr = TableCreate(aij->n/5,&aij->colmap); CHKERRQ(ierr); 35b1fc9764SSatish Balay for ( i=0; i<n; i++ ){ 36b1fc9764SSatish Balay ierr = TableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 37b1fc9764SSatish Balay } 38b1fc9764SSatish Balay #else 39758f045eSSatish Balay aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 40464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 41cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 42905e6a2fSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 43b1fc9764SSatish Balay #endif 443a40ed3dSBarry Smith PetscFunctionReturn(0); 459e25ed09SBarry Smith } 469e25ed09SBarry Smith 470520107fSSatish Balay #define CHUNKSIZE 15 4830770e4dSSatish Balay #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 490520107fSSatish Balay { \ 500520107fSSatish Balay \ 510520107fSSatish Balay rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 5230770e4dSSatish Balay rmax = aimax[row]; nrow = ailen[row]; \ 53f5e9677aSSatish Balay col1 = col - shift; \ 54f5e9677aSSatish Balay \ 55ba4e3ef2SSatish Balay low = 0; high = nrow; \ 56ba4e3ef2SSatish Balay while (high-low > 5) { \ 57ba4e3ef2SSatish Balay t = (low+high)/2; \ 58ba4e3ef2SSatish Balay if (rp[t] > col) high = t; \ 59ba4e3ef2SSatish Balay else low = t; \ 60ba4e3ef2SSatish Balay } \ 610520107fSSatish Balay for ( _i=0; _i<nrow; _i++ ) { \ 62f5e9677aSSatish Balay if (rp[_i] > col1) break; \ 63f5e9677aSSatish Balay if (rp[_i] == col1) { \ 640520107fSSatish Balay if (addv == ADD_VALUES) ap[_i] += value; \ 650520107fSSatish Balay else ap[_i] = value; \ 6630770e4dSSatish Balay goto a_noinsert; \ 670520107fSSatish Balay } \ 680520107fSSatish Balay } \ 6989280ab3SLois Curfman McInnes if (nonew == 1) goto a_noinsert; \ 70a8c6a408SBarry Smith else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 710520107fSSatish Balay if (nrow >= rmax) { \ 720520107fSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 730520107fSSatish Balay int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 740520107fSSatish Balay Scalar *new_a; \ 750520107fSSatish Balay \ 76a8c6a408SBarry Smith if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 7789280ab3SLois Curfman McInnes \ 780520107fSSatish Balay /* malloc new storage space */ \ 790520107fSSatish Balay len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 800520107fSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 810520107fSSatish Balay new_j = (int *) (new_a + new_nz); \ 820520107fSSatish Balay new_i = new_j + new_nz; \ 830520107fSSatish Balay \ 840520107fSSatish Balay /* copy over old data into new slots */ \ 850520107fSSatish Balay for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 860520107fSSatish Balay for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 870520107fSSatish Balay PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 880520107fSSatish Balay len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 890520107fSSatish Balay PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 900520107fSSatish Balay len*sizeof(int)); \ 910520107fSSatish Balay PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 920520107fSSatish Balay PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 930520107fSSatish Balay len*sizeof(Scalar)); \ 940520107fSSatish Balay /* free up old matrix storage */ \ 95f5e9677aSSatish Balay \ 960520107fSSatish Balay PetscFree(a->a); \ 970520107fSSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 980520107fSSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 990520107fSSatish Balay a->singlemalloc = 1; \ 1000520107fSSatish Balay \ 1010520107fSSatish Balay rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 10230770e4dSSatish Balay rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 1030520107fSSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 1040520107fSSatish Balay a->maxnz += CHUNKSIZE; \ 1050520107fSSatish Balay a->reallocs++; \ 1060520107fSSatish Balay } \ 1070520107fSSatish Balay N = nrow++ - 1; a->nz++; \ 1080520107fSSatish Balay /* shift up all the later entries in this row */ \ 1090520107fSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 1100520107fSSatish Balay rp[ii+1] = rp[ii]; \ 1110520107fSSatish Balay ap[ii+1] = ap[ii]; \ 1120520107fSSatish Balay } \ 113f5e9677aSSatish Balay rp[_i] = col1; \ 1140520107fSSatish Balay ap[_i] = value; \ 11530770e4dSSatish Balay a_noinsert: ; \ 1160520107fSSatish Balay ailen[row] = nrow; \ 1170520107fSSatish Balay } 1180a198c4cSBarry Smith 11930770e4dSSatish Balay #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 12030770e4dSSatish Balay { \ 12130770e4dSSatish Balay \ 12230770e4dSSatish Balay rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 12330770e4dSSatish Balay rmax = bimax[row]; nrow = bilen[row]; \ 12430770e4dSSatish Balay col1 = col - shift; \ 12530770e4dSSatish Balay \ 126ba4e3ef2SSatish Balay low = 0; high = nrow; \ 127ba4e3ef2SSatish Balay while (high-low > 5) { \ 128ba4e3ef2SSatish Balay t = (low+high)/2; \ 129ba4e3ef2SSatish Balay if (rp[t] > col) high = t; \ 130ba4e3ef2SSatish Balay else low = t; \ 131ba4e3ef2SSatish Balay } \ 13230770e4dSSatish Balay for ( _i=0; _i<nrow; _i++ ) { \ 13330770e4dSSatish Balay if (rp[_i] > col1) break; \ 13430770e4dSSatish Balay if (rp[_i] == col1) { \ 13530770e4dSSatish Balay if (addv == ADD_VALUES) ap[_i] += value; \ 13630770e4dSSatish Balay else ap[_i] = value; \ 13730770e4dSSatish Balay goto b_noinsert; \ 13830770e4dSSatish Balay } \ 13930770e4dSSatish Balay } \ 14089280ab3SLois Curfman McInnes if (nonew == 1) goto b_noinsert; \ 141a8c6a408SBarry Smith else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 14230770e4dSSatish Balay if (nrow >= rmax) { \ 14330770e4dSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 14474c639caSSatish Balay int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 14530770e4dSSatish Balay Scalar *new_a; \ 14630770e4dSSatish Balay \ 147a8c6a408SBarry Smith if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 14889280ab3SLois Curfman McInnes \ 14930770e4dSSatish Balay /* malloc new storage space */ \ 15074c639caSSatish Balay len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 15130770e4dSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 15230770e4dSSatish Balay new_j = (int *) (new_a + new_nz); \ 15330770e4dSSatish Balay new_i = new_j + new_nz; \ 15430770e4dSSatish Balay \ 15530770e4dSSatish Balay /* copy over old data into new slots */ \ 15630770e4dSSatish Balay for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 15774c639caSSatish Balay for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 15830770e4dSSatish Balay PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 15930770e4dSSatish Balay len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 16030770e4dSSatish Balay PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 16130770e4dSSatish Balay len*sizeof(int)); \ 16230770e4dSSatish Balay PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 16330770e4dSSatish Balay PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 16430770e4dSSatish Balay len*sizeof(Scalar)); \ 16530770e4dSSatish Balay /* free up old matrix storage */ \ 16630770e4dSSatish Balay \ 16774c639caSSatish Balay PetscFree(b->a); \ 16874c639caSSatish Balay if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 16974c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 17074c639caSSatish Balay b->singlemalloc = 1; \ 17130770e4dSSatish Balay \ 17230770e4dSSatish Balay rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 17330770e4dSSatish Balay rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 17474c639caSSatish Balay PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 17574c639caSSatish Balay b->maxnz += CHUNKSIZE; \ 17674c639caSSatish Balay b->reallocs++; \ 17730770e4dSSatish Balay } \ 17874c639caSSatish Balay N = nrow++ - 1; b->nz++; \ 17930770e4dSSatish Balay /* shift up all the later entries in this row */ \ 18030770e4dSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 18130770e4dSSatish Balay rp[ii+1] = rp[ii]; \ 18230770e4dSSatish Balay ap[ii+1] = ap[ii]; \ 18330770e4dSSatish Balay } \ 18430770e4dSSatish Balay rp[_i] = col1; \ 18530770e4dSSatish Balay ap[_i] = value; \ 18630770e4dSSatish Balay b_noinsert: ; \ 18730770e4dSSatish Balay bilen[row] = nrow; \ 18830770e4dSSatish Balay } 18930770e4dSSatish Balay 1905615d1e5SSatish Balay #undef __FUNC__ 1915615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIAIJ" 1928f6be9afSLois Curfman McInnes int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 1938a729477SBarry Smith { 19444a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1954b0e389bSBarry Smith Scalar value; 1961eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 1971eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 198905e6a2fSBarry Smith int roworiented = aij->roworiented; 1998a729477SBarry Smith 2000520107fSSatish Balay /* Some Variables required in the macro */ 2014ee7247eSSatish Balay Mat A = aij->A; 2024ee7247eSSatish Balay Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 20330770e4dSSatish Balay int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 20430770e4dSSatish Balay Scalar *aa = a->a; 20530770e4dSSatish Balay 20630770e4dSSatish Balay Mat B = aij->B; 20730770e4dSSatish Balay Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 20830770e4dSSatish Balay int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 20930770e4dSSatish Balay Scalar *ba = b->a; 21030770e4dSSatish Balay 211ba4e3ef2SSatish Balay int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 21230770e4dSSatish Balay int nonew = a->nonew,shift = a->indexshift; 21330770e4dSSatish Balay Scalar *ap; 2144ee7247eSSatish Balay 2153a40ed3dSBarry Smith PetscFunctionBegin; 2168a729477SBarry Smith for ( i=0; i<m; i++ ) { 2175ef9f2a5SBarry Smith if (im[i] < 0) continue; 2183a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 219a8c6a408SBarry Smith if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 2200a198c4cSBarry Smith #endif 2214b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 2224b0e389bSBarry Smith row = im[i] - rstart; 2231eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 2244b0e389bSBarry Smith if (in[j] >= cstart && in[j] < cend){ 2254b0e389bSBarry Smith col = in[j] - cstart; 2264b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 22730770e4dSSatish Balay MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 2280520107fSSatish Balay /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 2291eb62cbbSBarry Smith } 2305ef9f2a5SBarry Smith else if (in[j] < 0) continue; 2313a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 232a8c6a408SBarry Smith else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 2330a198c4cSBarry Smith #endif 2341eb62cbbSBarry Smith else { 235227d817aSBarry Smith if (mat->was_assembled) { 236905e6a2fSBarry Smith if (!aij->colmap) { 237905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 238905e6a2fSBarry Smith } 239b1fc9764SSatish Balay #if defined (USE_CTABLE) 240fa46199cSSatish Balay ierr = TableFind(aij->colmap,in[j]+1,&col); CHKERRQ(ierr); 241fa46199cSSatish Balay col--; 242b1fc9764SSatish Balay #else 243905e6a2fSBarry Smith col = aij->colmap[in[j]] - 1; 244b1fc9764SSatish Balay #endif 245ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 2462493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2474b0e389bSBarry Smith col = in[j]; 2489bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 249f9508a3cSSatish Balay B = aij->B; 250f9508a3cSSatish Balay b = (Mat_SeqAIJ *) B->data; 251f9508a3cSSatish Balay bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 252f9508a3cSSatish Balay ba = b->a; 253d6dfbf8fSBarry Smith } 254c48de900SBarry Smith } else col = in[j]; 2554b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 25630770e4dSSatish Balay MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 25730770e4dSSatish Balay /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 2581eb62cbbSBarry Smith } 2591eb62cbbSBarry Smith } 2605ef9f2a5SBarry Smith } else { 26190f02eecSBarry Smith if (roworiented && !aij->donotstash) { 2624b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 2635ef9f2a5SBarry Smith } else { 26490f02eecSBarry Smith if (!aij->donotstash) { 2654b0e389bSBarry Smith row = im[i]; 2664b0e389bSBarry Smith for ( j=0; j<n; j++ ) { 2674b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 2684b0e389bSBarry Smith } 2694b0e389bSBarry Smith } 2701eb62cbbSBarry Smith } 2718a729477SBarry Smith } 27290f02eecSBarry Smith } 2733a40ed3dSBarry Smith PetscFunctionReturn(0); 2748a729477SBarry Smith } 2758a729477SBarry Smith 2765615d1e5SSatish Balay #undef __FUNC__ 2775615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIAIJ" 2788f6be9afSLois Curfman McInnes int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 279b49de8d1SLois Curfman McInnes { 280b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 281b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 282b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 283b49de8d1SLois Curfman McInnes 2843a40ed3dSBarry Smith PetscFunctionBegin; 285b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 286a8c6a408SBarry Smith if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 287a8c6a408SBarry Smith if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 288b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 289b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 290b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 291a8c6a408SBarry Smith if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 292a8c6a408SBarry Smith if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 293b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 294b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 295b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 296fa852ad4SSatish Balay } else { 297905e6a2fSBarry Smith if (!aij->colmap) { 298905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 299905e6a2fSBarry Smith } 300b1fc9764SSatish Balay #if defined (USE_CTABLE) 301fa46199cSSatish Balay ierr = TableFind(aij->colmap,idxn[j]+1,&col); CHKERRQ(ierr); 302fa46199cSSatish Balay col --; 303b1fc9764SSatish Balay #else 304905e6a2fSBarry Smith col = aij->colmap[idxn[j]] - 1; 305b1fc9764SSatish Balay #endif 306e60e1c95SSatish Balay if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 307d9d09a02SSatish Balay else { 308b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 309b49de8d1SLois Curfman McInnes } 310b49de8d1SLois Curfman McInnes } 311b49de8d1SLois Curfman McInnes } 312a8c6a408SBarry Smith } else { 313a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 314b49de8d1SLois Curfman McInnes } 315b49de8d1SLois Curfman McInnes } 3163a40ed3dSBarry Smith PetscFunctionReturn(0); 317b49de8d1SLois Curfman McInnes } 318*bc5ccf88SSatish Balay #if defined (__JUNK__) 319b49de8d1SLois Curfman McInnes 3205615d1e5SSatish Balay #undef __FUNC__ 3215615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 3228f6be9afSLois Curfman McInnes int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 3238a729477SBarry Smith { 32444a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 325d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 32617699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 32717699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 3281eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 3296abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 3301eb62cbbSBarry Smith InsertMode addv; 3311eb62cbbSBarry Smith Scalar *rvalues,*svalues; 3321eb62cbbSBarry Smith 3333a40ed3dSBarry Smith PetscFunctionBegin; 334b5bd3ad5SBarry Smith if (aij->donotstash) { 335b5bd3ad5SBarry Smith aij->svalues = 0; aij->rvalues = 0; 336b5bd3ad5SBarry Smith aij->nsends = 0; aij->nrecvs = 0; 337b5bd3ad5SBarry Smith aij->send_waits = 0; aij->recv_waits = 0; 338b5bd3ad5SBarry Smith aij->rmax = 0; 339b5bd3ad5SBarry Smith PetscFunctionReturn(0); 340b5bd3ad5SBarry Smith } 341b5bd3ad5SBarry Smith 3421eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 343ca161407SBarry Smith ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 344dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 345a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 3461eb62cbbSBarry Smith } 34747794344SBarry Smith mat->insertmode = addv; /* in case this processor had no cache */ 3481eb62cbbSBarry Smith 3491eb62cbbSBarry Smith /* first count number of contributors to each processor */ 3500452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 351cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 3520452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 3531eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 3541eb62cbbSBarry Smith idx = aij->stash.idx[i]; 35517699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 3561eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3571eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 3588a729477SBarry Smith } 3598a729477SBarry Smith } 3608a729477SBarry Smith } 36117699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 3621eb62cbbSBarry Smith 3631eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3640452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 365ca161407SBarry Smith ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 36617699dbbSLois Curfman McInnes nreceives = work[rank]; 367ca161407SBarry Smith ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 36817699dbbSLois Curfman McInnes nmax = work[rank]; 3690452661fSBarry Smith PetscFree(work); 3701eb62cbbSBarry Smith 3711eb62cbbSBarry Smith /* post receives: 3721eb62cbbSBarry Smith 1) each message will consist of ordered pairs 3731eb62cbbSBarry Smith (global index,value) we store the global index as a double 374d6dfbf8fSBarry Smith to simplify the message passing. 3751eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 3761eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 3771eb62cbbSBarry Smith this is a lot of wasted space. 3781eb62cbbSBarry Smith 3791eb62cbbSBarry Smith 3801eb62cbbSBarry Smith This could be done better. 3811eb62cbbSBarry Smith */ 382ca161407SBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 383ca161407SBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 3841eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 385ca161407SBarry Smith ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 386ca161407SBarry Smith comm,recv_waits+i);CHKERRQ(ierr); 3871eb62cbbSBarry Smith } 3881eb62cbbSBarry Smith 3891eb62cbbSBarry Smith /* do sends: 3901eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3911eb62cbbSBarry Smith the ith processor 3921eb62cbbSBarry Smith */ 3930452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 394ca161407SBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 3950452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 3961eb62cbbSBarry Smith starts[0] = 0; 39717699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3981eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 3991eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 4001eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 4011eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 4021eb62cbbSBarry Smith } 4030452661fSBarry Smith PetscFree(owner); 4041eb62cbbSBarry Smith starts[0] = 0; 40517699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 4061eb62cbbSBarry Smith count = 0; 40717699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 4081eb62cbbSBarry Smith if (procs[i]) { 409ca161407SBarry Smith ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 4101eb62cbbSBarry Smith } 4111eb62cbbSBarry Smith } 412b5bd3ad5SBarry Smith PetscFree(starts); 413b5bd3ad5SBarry Smith PetscFree(nprocs); 4141eb62cbbSBarry Smith 4151eb62cbbSBarry Smith /* Free cache space */ 41610a665d1SBarry Smith PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 417*bc5ccf88SSatish Balay ierr = StashReset_Private(&aij->stash); CHKERRQ(ierr); 4181eb62cbbSBarry Smith 4191eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 4201eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 4211eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 4221eb62cbbSBarry Smith aij->rmax = nmax; 4231eb62cbbSBarry Smith 4243a40ed3dSBarry Smith PetscFunctionReturn(0); 4251eb62cbbSBarry Smith } 4261eb62cbbSBarry Smith 4275615d1e5SSatish Balay #undef __FUNC__ 4285615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 4298f6be9afSLois Curfman McInnes int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 4301eb62cbbSBarry Smith { 43144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 4321eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 433416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 434905e6a2fSBarry Smith int row,col,other_disassembled; 4351eb62cbbSBarry Smith Scalar *values,val; 43647794344SBarry Smith InsertMode addv = mat->insertmode; 4371eb62cbbSBarry Smith 4383a40ed3dSBarry Smith PetscFunctionBegin; 439b5bd3ad5SBarry Smith 4401eb62cbbSBarry Smith /* wait on receives */ 4411eb62cbbSBarry Smith while (count) { 442ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 4431eb62cbbSBarry Smith /* unpack receives into our local space */ 444d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 445ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 4461eb62cbbSBarry Smith n = n/3; 4471eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 448227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - aij->rstart; 449227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 4501eb62cbbSBarry Smith val = values[3*i+2]; 4511eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 4521eb62cbbSBarry Smith col -= aij->cstart; 4534a69d603SSatish Balay ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 4543a40ed3dSBarry Smith } else { 45555a1b374SBarry Smith if (mat->was_assembled || mat->assembled) { 456905e6a2fSBarry Smith if (!aij->colmap) { 457905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 458905e6a2fSBarry Smith } 459b1fc9764SSatish Balay #if defined (USE_CTABLE) 460fa46199cSSatish Balay ierr = TableFind(aij->colmap,col+1,&col); CHKERRQ(ierr); 461fa46199cSSatish Balay col --; 462b1fc9764SSatish Balay #else 463905e6a2fSBarry Smith col = aij->colmap[col] - 1; 464b1fc9764SSatish Balay #endif 465ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4662493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 467227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 468d6dfbf8fSBarry Smith } 4699e25ed09SBarry Smith } 4704a69d603SSatish Balay ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 4711eb62cbbSBarry Smith } 4721eb62cbbSBarry Smith } 4731eb62cbbSBarry Smith count--; 4741eb62cbbSBarry Smith } 475570da906SBarry Smith if (aij->recv_waits) PetscFree(aij->recv_waits); 476570da906SBarry Smith if (aij->rvalues) PetscFree(aij->rvalues); 4771eb62cbbSBarry Smith 4781eb62cbbSBarry Smith /* wait on sends */ 4791eb62cbbSBarry Smith if (aij->nsends) { 4800a198c4cSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 481ca161407SBarry Smith ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 4820452661fSBarry Smith PetscFree(send_status); 4831eb62cbbSBarry Smith } 484b5bd3ad5SBarry Smith if (aij->send_waits) PetscFree(aij->send_waits); 485b5bd3ad5SBarry Smith if (aij->svalues) PetscFree(aij->svalues); 4861eb62cbbSBarry Smith 48778b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 48878b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 4891eb62cbbSBarry Smith 4902493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 4912493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 49241e46ba1SBarry Smith /* 49341e46ba1SBarry Smith if nonzero structure of submatrix B cannot change then we know that 49441e46ba1SBarry Smith no processor disassembled thus we can skip this stuff 49541e46ba1SBarry Smith */ 49641e46ba1SBarry Smith if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 497ca161407SBarry Smith ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 498227d817aSBarry Smith if (mat->was_assembled && !other_disassembled) { 4992493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 5002493cbb0SBarry Smith } 50141e46ba1SBarry Smith } 5022493cbb0SBarry Smith 5036d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 50478b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 5055e42470aSBarry Smith } 50678b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 50778b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 5085e42470aSBarry Smith 5097a0afa10SBarry Smith if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 5103a40ed3dSBarry Smith PetscFunctionReturn(0); 5118a729477SBarry Smith } 5128a729477SBarry Smith 513*bc5ccf88SSatish Balay #else 514*bc5ccf88SSatish Balay 515*bc5ccf88SSatish Balay #undef __FUNC__ 516*bc5ccf88SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 517*bc5ccf88SSatish Balay int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 518*bc5ccf88SSatish Balay { 519*bc5ccf88SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 520*bc5ccf88SSatish Balay int ierr; 521*bc5ccf88SSatish Balay InsertMode addv; 522*bc5ccf88SSatish Balay 523*bc5ccf88SSatish Balay PetscFunctionBegin; 524*bc5ccf88SSatish Balay if (aij->donotstash) { 525*bc5ccf88SSatish Balay PetscFunctionReturn(0); 526*bc5ccf88SSatish Balay } 527*bc5ccf88SSatish Balay 528*bc5ccf88SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 529*bc5ccf88SSatish Balay ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 530*bc5ccf88SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 531*bc5ccf88SSatish Balay SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 532*bc5ccf88SSatish Balay } 533*bc5ccf88SSatish Balay mat->insertmode = addv; /* in case this processor had no cache */ 534*bc5ccf88SSatish Balay 535*bc5ccf88SSatish Balay ierr = StashScatterBegin_Private(&aij->stash,aij->rowners); CHKERRQ(ierr); 536*bc5ccf88SSatish Balay /* Free cache space */ 537*bc5ccf88SSatish Balay PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 538*bc5ccf88SSatish Balay PetscFunctionReturn(0); 539*bc5ccf88SSatish Balay } 540*bc5ccf88SSatish Balay 541*bc5ccf88SSatish Balay 542*bc5ccf88SSatish Balay #undef __FUNC__ 543*bc5ccf88SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 544*bc5ccf88SSatish Balay int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 545*bc5ccf88SSatish Balay { 546*bc5ccf88SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 547*bc5ccf88SSatish Balay int imdex,i,j,rstart,ncols,n,ierr; 548*bc5ccf88SSatish Balay int *row,*col,other_disassembled; 549*bc5ccf88SSatish Balay Scalar *val; 550*bc5ccf88SSatish Balay InsertMode addv = mat->insertmode; 551*bc5ccf88SSatish Balay 552*bc5ccf88SSatish Balay PetscFunctionBegin; 553*bc5ccf88SSatish Balay if (!aij->donotstash) { 554*bc5ccf88SSatish Balay ierr = StashScatterEnd_Private(&aij->stash); CHKERRQ(ierr); 555*bc5ccf88SSatish Balay for (imdex=0; imdex<aij->stash.nrecvs; imdex++ ) { 556*bc5ccf88SSatish Balay n = aij->stash.rdata[imdex].n; 557*bc5ccf88SSatish Balay row = aij->stash.rdata[imdex].i; 558*bc5ccf88SSatish Balay col = aij->stash.rdata[imdex].j; 559*bc5ccf88SSatish Balay val = aij->stash.rdata[imdex].a; 560*bc5ccf88SSatish Balay for ( i=0; i<n; ) { 561*bc5ccf88SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 562*bc5ccf88SSatish Balay for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 563*bc5ccf88SSatish Balay if (j < n) ncols = j-i; 564*bc5ccf88SSatish Balay else ncols = n-i; 565*bc5ccf88SSatish Balay /* Now assemble all these values with a single function call */ 566*bc5ccf88SSatish Balay ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr); 567*bc5ccf88SSatish Balay i = j; 568*bc5ccf88SSatish Balay } 569*bc5ccf88SSatish Balay } 570*bc5ccf88SSatish Balay ierr = StashReset_Private(&aij->stash); CHKERRQ(ierr); 571*bc5ccf88SSatish Balay } 572*bc5ccf88SSatish Balay 573*bc5ccf88SSatish Balay ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 574*bc5ccf88SSatish Balay ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 575*bc5ccf88SSatish Balay 576*bc5ccf88SSatish Balay /* determine if any processor has disassembled, if so we must 577*bc5ccf88SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 578*bc5ccf88SSatish Balay /* 579*bc5ccf88SSatish Balay if nonzero structure of submatrix B cannot change then we know that 580*bc5ccf88SSatish Balay no processor disassembled thus we can skip this stuff 581*bc5ccf88SSatish Balay */ 582*bc5ccf88SSatish Balay if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 583*bc5ccf88SSatish Balay ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 584*bc5ccf88SSatish Balay if (mat->was_assembled && !other_disassembled) { 585*bc5ccf88SSatish Balay ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 586*bc5ccf88SSatish Balay } 587*bc5ccf88SSatish Balay } 588*bc5ccf88SSatish Balay 589*bc5ccf88SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 590*bc5ccf88SSatish Balay ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 591*bc5ccf88SSatish Balay } 592*bc5ccf88SSatish Balay ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 593*bc5ccf88SSatish Balay ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 594*bc5ccf88SSatish Balay 595*bc5ccf88SSatish Balay if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 596*bc5ccf88SSatish Balay PetscFunctionReturn(0); 597*bc5ccf88SSatish Balay } 598*bc5ccf88SSatish Balay 599*bc5ccf88SSatish Balay #endif 600*bc5ccf88SSatish Balay 6015615d1e5SSatish Balay #undef __FUNC__ 6025615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIAIJ" 6038f6be9afSLois Curfman McInnes int MatZeroEntries_MPIAIJ(Mat A) 6041eb62cbbSBarry Smith { 60544a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 606dbd7a890SLois Curfman McInnes int ierr; 6073a40ed3dSBarry Smith 6083a40ed3dSBarry Smith PetscFunctionBegin; 60978b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 61078b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 6113a40ed3dSBarry Smith PetscFunctionReturn(0); 6121eb62cbbSBarry Smith } 6131eb62cbbSBarry Smith 6145615d1e5SSatish Balay #undef __FUNC__ 6155615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIAIJ" 6168f6be9afSLois Curfman McInnes int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 6171eb62cbbSBarry Smith { 61844a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 61917699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 6206a5c57faSSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work,row; 62117699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 6225392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 6236a5c57faSSatish Balay int *lens,imdex,*lrows,*values,rstart=l->rstart; 624d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 6251eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 6261eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 6271eb62cbbSBarry Smith IS istmp; 6281eb62cbbSBarry Smith 6293a40ed3dSBarry Smith PetscFunctionBegin; 63077c4ece6SBarry Smith ierr = ISGetSize(is,&N); CHKERRQ(ierr); 63178b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 6321eb62cbbSBarry Smith 6331eb62cbbSBarry Smith /* first count number of contributors to each processor */ 6340452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 635cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 6360452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 6371eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 6381eb62cbbSBarry Smith idx = rows[i]; 6391eb62cbbSBarry Smith found = 0; 64017699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 6411eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 6421eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 6431eb62cbbSBarry Smith } 6441eb62cbbSBarry Smith } 645a8c6a408SBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 6461eb62cbbSBarry Smith } 64717699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 6481eb62cbbSBarry Smith 6491eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 6500452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 651ca161407SBarry Smith ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 65217699dbbSLois Curfman McInnes nrecvs = work[rank]; 653ca161407SBarry Smith ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 65417699dbbSLois Curfman McInnes nmax = work[rank]; 6550452661fSBarry Smith PetscFree(work); 6561eb62cbbSBarry Smith 6571eb62cbbSBarry Smith /* post receives: */ 6583a40ed3dSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 659ca161407SBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 6601eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 661ca161407SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 6621eb62cbbSBarry Smith } 6631eb62cbbSBarry Smith 6641eb62cbbSBarry Smith /* do sends: 6651eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 6661eb62cbbSBarry Smith the ith processor 6671eb62cbbSBarry Smith */ 6680452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 6693a40ed3dSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 6700452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 6711eb62cbbSBarry Smith starts[0] = 0; 67217699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 6731eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 6741eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 6751eb62cbbSBarry Smith } 6761eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 6771eb62cbbSBarry Smith 6781eb62cbbSBarry Smith starts[0] = 0; 67917699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 6801eb62cbbSBarry Smith count = 0; 68117699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 6821eb62cbbSBarry Smith if (procs[i]) { 683ca161407SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 6841eb62cbbSBarry Smith } 6851eb62cbbSBarry Smith } 6860452661fSBarry Smith PetscFree(starts); 6871eb62cbbSBarry Smith 68817699dbbSLois Curfman McInnes base = owners[rank]; 6891eb62cbbSBarry Smith 6901eb62cbbSBarry Smith /* wait on receives */ 6910452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 6921eb62cbbSBarry Smith source = lens + nrecvs; 6931eb62cbbSBarry Smith count = nrecvs; slen = 0; 6941eb62cbbSBarry Smith while (count) { 695ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 6961eb62cbbSBarry Smith /* unpack receives into our local space */ 697ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 698d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 699d6dfbf8fSBarry Smith lens[imdex] = n; 7001eb62cbbSBarry Smith slen += n; 7011eb62cbbSBarry Smith count--; 7021eb62cbbSBarry Smith } 7030452661fSBarry Smith PetscFree(recv_waits); 7041eb62cbbSBarry Smith 7051eb62cbbSBarry Smith /* move the data into the send scatter */ 7060452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 7071eb62cbbSBarry Smith count = 0; 7081eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 7091eb62cbbSBarry Smith values = rvalues + i*nmax; 7101eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 7111eb62cbbSBarry Smith lrows[count++] = values[j] - base; 7121eb62cbbSBarry Smith } 7131eb62cbbSBarry Smith } 7140452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 7150452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 7161eb62cbbSBarry Smith 7171eb62cbbSBarry Smith /* actually zap the local rows */ 718029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 719464493b3SBarry Smith PLogObjectParent(A,istmp); 7206a5c57faSSatish Balay 7216eb55b6aSBarry Smith /* 7226eb55b6aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 7236eb55b6aSBarry Smith is square and the user wishes to set the diagonal we use seperate 7246eb55b6aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 7256eb55b6aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 7266eb55b6aSBarry Smith 7276eb55b6aSBarry Smith Contributed by: Mathew Knepley 7286eb55b6aSBarry Smith */ 729e2d53e46SBarry Smith /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 730e2d53e46SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 7316eb55b6aSBarry Smith if (diag && (l->A->M == l->A->N)) { 7326eb55b6aSBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 733e2d53e46SBarry Smith } else if (diag) { 734e2d53e46SBarry Smith ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 735fa46199cSSatish Balay if (((Mat_SeqAIJ*)l->A->data)->nonew) { 736fa46199cSSatish Balay SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 737fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 7386525c446SSatish Balay } 739e2d53e46SBarry Smith for ( i = 0; i < slen; i++ ) { 740e2d53e46SBarry Smith row = lrows[i] + rstart; 741e2d53e46SBarry Smith ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 742e2d53e46SBarry Smith } 743e2d53e46SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 744e2d53e46SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7456eb55b6aSBarry Smith } else { 7466a5c57faSSatish Balay ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 7476eb55b6aSBarry Smith } 74878b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 7496a5c57faSSatish Balay PetscFree(lrows); 75072dacd9aSBarry Smith 7511eb62cbbSBarry Smith /* wait on sends */ 7521eb62cbbSBarry Smith if (nsends) { 753ca161407SBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 754ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 7550452661fSBarry Smith PetscFree(send_status); 7561eb62cbbSBarry Smith } 7570452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 7581eb62cbbSBarry Smith 7593a40ed3dSBarry Smith PetscFunctionReturn(0); 7601eb62cbbSBarry Smith } 7611eb62cbbSBarry Smith 7625615d1e5SSatish Balay #undef __FUNC__ 7635615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIAIJ" 7648f6be9afSLois Curfman McInnes int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 7651eb62cbbSBarry Smith { 766416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 767fbd6ef76SBarry Smith int ierr,nt; 768416022c9SBarry Smith 7693a40ed3dSBarry Smith PetscFunctionBegin; 770a2ce50c7SBarry Smith ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 771fbd6ef76SBarry Smith if (nt != a->n) { 772a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 773fbd6ef76SBarry Smith } 77443a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 775f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 77643a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 777f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 7783a40ed3dSBarry Smith PetscFunctionReturn(0); 7791eb62cbbSBarry Smith } 7801eb62cbbSBarry Smith 7815615d1e5SSatish Balay #undef __FUNC__ 7825615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIAIJ" 7838f6be9afSLois Curfman McInnes int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 784da3a660dSBarry Smith { 785416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 786da3a660dSBarry Smith int ierr; 7873a40ed3dSBarry Smith 7883a40ed3dSBarry Smith PetscFunctionBegin; 78943a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 790f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 79143a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 792f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 7933a40ed3dSBarry Smith PetscFunctionReturn(0); 794da3a660dSBarry Smith } 795da3a660dSBarry Smith 7965615d1e5SSatish Balay #undef __FUNC__ 7975615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIAIJ" 7988f6be9afSLois Curfman McInnes int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 799da3a660dSBarry Smith { 800416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 801da3a660dSBarry Smith int ierr; 802da3a660dSBarry Smith 8033a40ed3dSBarry Smith PetscFunctionBegin; 804da3a660dSBarry Smith /* do nondiagonal part */ 805f830108cSBarry Smith ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 806da3a660dSBarry Smith /* send it on its way */ 807537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 808da3a660dSBarry Smith /* do local part */ 809f830108cSBarry Smith ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 810da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 811da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 812da3a660dSBarry Smith /* but is not perhaps always true. */ 813537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 8143a40ed3dSBarry Smith PetscFunctionReturn(0); 815da3a660dSBarry Smith } 816da3a660dSBarry Smith 8175615d1e5SSatish Balay #undef __FUNC__ 8185615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIAIJ" 8198f6be9afSLois Curfman McInnes int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 820da3a660dSBarry Smith { 821416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 822da3a660dSBarry Smith int ierr; 823da3a660dSBarry Smith 8243a40ed3dSBarry Smith PetscFunctionBegin; 825da3a660dSBarry Smith /* do nondiagonal part */ 826f830108cSBarry Smith ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 827da3a660dSBarry Smith /* send it on its way */ 828537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 829da3a660dSBarry Smith /* do local part */ 830f830108cSBarry Smith ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 831da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 832da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 833da3a660dSBarry Smith /* but is not perhaps always true. */ 8340a198c4cSBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 8353a40ed3dSBarry Smith PetscFunctionReturn(0); 836da3a660dSBarry Smith } 837da3a660dSBarry Smith 8381eb62cbbSBarry Smith /* 8391eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 8401eb62cbbSBarry Smith diagonal block 8411eb62cbbSBarry Smith */ 8425615d1e5SSatish Balay #undef __FUNC__ 8435615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIAIJ" 8448f6be9afSLois Curfman McInnes int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 8451eb62cbbSBarry Smith { 8463a40ed3dSBarry Smith int ierr; 847416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 8483a40ed3dSBarry Smith 8493a40ed3dSBarry Smith PetscFunctionBegin; 850a8c6a408SBarry Smith if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 8515baf8537SBarry Smith if (a->rstart != a->cstart || a->rend != a->cend) { 852a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 8533a40ed3dSBarry Smith } 8543a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 8553a40ed3dSBarry Smith PetscFunctionReturn(0); 8561eb62cbbSBarry Smith } 8571eb62cbbSBarry Smith 8585615d1e5SSatish Balay #undef __FUNC__ 8595615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIAIJ" 8608f6be9afSLois Curfman McInnes int MatScale_MPIAIJ(Scalar *aa,Mat A) 861052efed2SBarry Smith { 862052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 863052efed2SBarry Smith int ierr; 8643a40ed3dSBarry Smith 8653a40ed3dSBarry Smith PetscFunctionBegin; 866052efed2SBarry Smith ierr = MatScale(aa,a->A); CHKERRQ(ierr); 867052efed2SBarry Smith ierr = MatScale(aa,a->B); CHKERRQ(ierr); 8683a40ed3dSBarry Smith PetscFunctionReturn(0); 869052efed2SBarry Smith } 870052efed2SBarry Smith 8715615d1e5SSatish Balay #undef __FUNC__ 872d4bb536fSBarry Smith #define __FUNC__ "MatDestroy_MPIAIJ" 873e1311b90SBarry Smith int MatDestroy_MPIAIJ(Mat mat) 8741eb62cbbSBarry Smith { 87544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 8761eb62cbbSBarry Smith int ierr; 87783e2fdc7SBarry Smith 8783a40ed3dSBarry Smith PetscFunctionBegin; 87970429bc8SBarry Smith if (--mat->refct > 0) PetscFunctionReturn(0); 88070429bc8SBarry Smith 88170429bc8SBarry Smith if (mat->mapping) { 88270429bc8SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 88370429bc8SBarry Smith } 88470429bc8SBarry Smith if (mat->bmapping) { 88570429bc8SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 88670429bc8SBarry Smith } 88761b13de0SBarry Smith if (mat->rmap) { 88861b13de0SBarry Smith ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 88961b13de0SBarry Smith } 89061b13de0SBarry Smith if (mat->cmap) { 89161b13de0SBarry Smith ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 89261b13de0SBarry Smith } 8933a40ed3dSBarry Smith #if defined(USE_PETSC_LOG) 894e1311b90SBarry Smith PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 895a5a9c739SBarry Smith #endif 89683e2fdc7SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 8970452661fSBarry Smith PetscFree(aij->rowners); 89878b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 89978b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 900b1fc9764SSatish Balay #if defined (USE_CTABLE) 901b1fc9764SSatish Balay if (aij->colmap) TableDelete(aij->colmap); 902b1fc9764SSatish Balay #else 9030452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 904b1fc9764SSatish Balay #endif 9050452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 9061eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 907a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 9087a0afa10SBarry Smith if (aij->rowvalues) PetscFree(aij->rowvalues); 9090452661fSBarry Smith PetscFree(aij); 910a5a9c739SBarry Smith PLogObjectDestroy(mat); 9110452661fSBarry Smith PetscHeaderDestroy(mat); 9123a40ed3dSBarry Smith PetscFunctionReturn(0); 9131eb62cbbSBarry Smith } 914ee50ffe9SBarry Smith 9155615d1e5SSatish Balay #undef __FUNC__ 916d4bb536fSBarry Smith #define __FUNC__ "MatView_MPIAIJ_Binary" 917*bc5ccf88SSatish Balay int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 9181eb62cbbSBarry Smith { 919416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 920416022c9SBarry Smith int ierr; 921416022c9SBarry Smith 9223a40ed3dSBarry Smith PetscFunctionBegin; 92317699dbbSLois Curfman McInnes if (aij->size == 1) { 924416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 925416022c9SBarry Smith } 926a8c6a408SBarry Smith else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 9273a40ed3dSBarry Smith PetscFunctionReturn(0); 928416022c9SBarry Smith } 929416022c9SBarry Smith 9305615d1e5SSatish Balay #undef __FUNC__ 9317b2a1423SBarry Smith #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket" 932*bc5ccf88SSatish Balay int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 933416022c9SBarry Smith { 93444a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 935dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 93677ed5343SBarry Smith int ierr, format,shift = C->indexshift,rank = aij->rank ; 93777ed5343SBarry Smith int size = aij->size; 938d636dbe3SBarry Smith FILE *fd; 93919bcc07fSBarry Smith ViewerType vtype; 940416022c9SBarry Smith 9413a40ed3dSBarry Smith PetscFunctionBegin; 94219bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 9433f1db9ecSBarry Smith if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 944d8467735SBarry Smith ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 9450a198c4cSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 9464e220ebcSLois Curfman McInnes MatInfo info; 9474e220ebcSLois Curfman McInnes int flg; 948a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 94990ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 9504e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 95195e01e2fSLois Curfman McInnes ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 95277c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 95395e01e2fSLois Curfman McInnes if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 9544e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 95595e01e2fSLois Curfman McInnes else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 9564e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 9574e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 9584e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 9594e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 9604e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 961a56f8943SBarry Smith fflush(fd); 96277c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 963a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 9643a40ed3dSBarry Smith PetscFunctionReturn(0); 9653a40ed3dSBarry Smith } else if (format == VIEWER_FORMAT_ASCII_INFO) { 9663a40ed3dSBarry Smith PetscFunctionReturn(0); 96708480c60SBarry Smith } 9683f1db9ecSBarry Smith } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 96919bcc07fSBarry Smith Draw draw; 97019bcc07fSBarry Smith PetscTruth isnull; 97177ed5343SBarry Smith ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 9723a40ed3dSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 97319bcc07fSBarry Smith } 97419bcc07fSBarry Smith 97517699dbbSLois Curfman McInnes if (size == 1) { 97678b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 9773a40ed3dSBarry Smith } else { 97895373324SBarry Smith /* assemble the entire matrix onto first processor. */ 97995373324SBarry Smith Mat A; 980ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 9812eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 98295373324SBarry Smith Scalar *a; 9832ee70a88SLois Curfman McInnes 98417699dbbSLois Curfman McInnes if (!rank) { 98555843e3eSBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 9863a40ed3dSBarry Smith } else { 98755843e3eSBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 98895373324SBarry Smith } 989464493b3SBarry Smith PLogObjectParent(mat,A); 990416022c9SBarry Smith 99195373324SBarry Smith /* copy over the A part */ 992ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 9932ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 99495373324SBarry Smith row = aij->rstart; 995dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 99695373324SBarry Smith for ( i=0; i<m; i++ ) { 997416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 99895373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 99995373324SBarry Smith } 10002ee70a88SLois Curfman McInnes aj = Aloc->j; 1001dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 100295373324SBarry Smith 100395373324SBarry Smith /* copy over the B part */ 1004ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 10052ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 100695373324SBarry Smith row = aij->rstart; 10070452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 1008dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 100995373324SBarry Smith for ( i=0; i<m; i++ ) { 1010416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 101195373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 101295373324SBarry Smith } 10130452661fSBarry Smith PetscFree(ct); 10146d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 10156d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 101655843e3eSBarry Smith /* 101755843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 101855843e3eSBarry Smith synchronized across all processors that share the Draw object 101955843e3eSBarry Smith */ 10203f1db9ecSBarry Smith if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 102178b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 102295373324SBarry Smith } 102378b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 102495373324SBarry Smith } 10253a40ed3dSBarry Smith PetscFunctionReturn(0); 10261eb62cbbSBarry Smith } 10271eb62cbbSBarry Smith 10285615d1e5SSatish Balay #undef __FUNC__ 1029d4bb536fSBarry Smith #define __FUNC__ "MatView_MPIAIJ" 1030e1311b90SBarry Smith int MatView_MPIAIJ(Mat mat,Viewer viewer) 1031416022c9SBarry Smith { 1032416022c9SBarry Smith int ierr; 103319bcc07fSBarry Smith ViewerType vtype; 1034416022c9SBarry Smith 10353a40ed3dSBarry Smith PetscFunctionBegin; 103619bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 10373f1db9ecSBarry Smith if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 10387b2a1423SBarry Smith PetscTypeCompare(vtype,SOCKET_VIEWER)) { 10397b2a1423SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr); 10403f1db9ecSBarry Smith } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 10413a40ed3dSBarry Smith ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 10425cd90555SBarry Smith } else { 10435cd90555SBarry Smith SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1044416022c9SBarry Smith } 10453a40ed3dSBarry Smith PetscFunctionReturn(0); 1046416022c9SBarry Smith } 1047416022c9SBarry Smith 10481eb62cbbSBarry Smith /* 10491eb62cbbSBarry Smith This has to provide several versions. 10501eb62cbbSBarry Smith 10511eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 10521eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 1053d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 10541eb62cbbSBarry Smith */ 10555615d1e5SSatish Balay #undef __FUNC__ 10565615d1e5SSatish Balay #define __FUNC__ "MatRelax_MPIAIJ" 10578f6be9afSLois Curfman McInnes int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 1058dbb450caSBarry Smith double fshift,int its,Vec xx) 10598a729477SBarry Smith { 106044a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1061d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 1062ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 1063c16cb8f2SBarry Smith Scalar *b,*x,*xs,*ls,d,*v,sum; 10646abc6512SBarry Smith int ierr,*idx, *diag; 1065416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 10668a729477SBarry Smith 10673a40ed3dSBarry Smith PetscFunctionBegin; 10682e8a6d31SBarry Smith ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 10692e8a6d31SBarry Smith ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 10702e8a6d31SBarry Smith ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 1071dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 1072dbb450caSBarry Smith ls = ls + shift; 107383e2fdc7SBarry Smith if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 1074d6dfbf8fSBarry Smith diag = A->diag; 1075c16cb8f2SBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1076da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 1077f830108cSBarry Smith ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 10782e8a6d31SBarry Smith ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 10792e8a6d31SBarry Smith ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 10802e8a6d31SBarry Smith ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 10813a40ed3dSBarry Smith PetscFunctionReturn(0); 1082da3a660dSBarry Smith } 10833a40ed3dSBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 10843a40ed3dSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1085d6dfbf8fSBarry Smith while (its--) { 1086d6dfbf8fSBarry Smith /* go down through the rows */ 1087d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 1088d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 10894d197716SBarry Smith PLogFlops(4*n+3); 1090dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 1091dbb450caSBarry Smith v = A->a + A->i[i] + shift; 1092d6dfbf8fSBarry Smith sum = b[i]; 1093d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 1094dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 1095d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 1096dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 1097dbb450caSBarry Smith v = B->a + B->i[i] + shift; 1098d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 109955a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1100d6dfbf8fSBarry Smith } 1101d6dfbf8fSBarry Smith /* come up through the rows */ 1102d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 1103d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 11044d197716SBarry Smith PLogFlops(4*n+3) 1105dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 1106dbb450caSBarry Smith v = A->a + A->i[i] + shift; 1107d6dfbf8fSBarry Smith sum = b[i]; 1108d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 1109dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 1110d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 1111dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 1112dbb450caSBarry Smith v = B->a + B->i[i] + shift; 1113d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 111455a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1115d6dfbf8fSBarry Smith } 1116d6dfbf8fSBarry Smith } 11173a40ed3dSBarry Smith } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1118da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 1119f830108cSBarry Smith ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 11202e8a6d31SBarry Smith ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 11212e8a6d31SBarry Smith ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 11222e8a6d31SBarry Smith ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 11233a40ed3dSBarry Smith PetscFunctionReturn(0); 1124da3a660dSBarry Smith } 11253a40ed3dSBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 11263a40ed3dSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1127d6dfbf8fSBarry Smith while (its--) { 1128d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 1129d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 11304d197716SBarry Smith PLogFlops(4*n+3); 1131dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 1132dbb450caSBarry Smith v = A->a + A->i[i] + shift; 1133d6dfbf8fSBarry Smith sum = b[i]; 1134d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 1135dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 1136d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 1137dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 1138dbb450caSBarry Smith v = B->a + B->i[i] + shift; 1139d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 114055a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1141d6dfbf8fSBarry Smith } 1142d6dfbf8fSBarry Smith } 11433a40ed3dSBarry Smith } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1144da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 1145f830108cSBarry Smith ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 11462e8a6d31SBarry Smith ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 11472e8a6d31SBarry Smith ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 11482e8a6d31SBarry Smith ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 11493a40ed3dSBarry Smith PetscFunctionReturn(0); 1150da3a660dSBarry Smith } 11512e8a6d31SBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 11522e8a6d31SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1153d6dfbf8fSBarry Smith while (its--) { 1154d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 1155d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 11564d197716SBarry Smith PLogFlops(4*n+3); 1157dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 1158dbb450caSBarry Smith v = A->a + A->i[i] + shift; 1159d6dfbf8fSBarry Smith sum = b[i]; 1160d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 1161dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 1162d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 1163dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 1164dbb450caSBarry Smith v = B->a + B->i[i] + shift; 1165d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 116655a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1167d6dfbf8fSBarry Smith } 1168d6dfbf8fSBarry Smith } 11693a40ed3dSBarry Smith } else { 1170a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1171c16cb8f2SBarry Smith } 11722e8a6d31SBarry Smith ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 11732e8a6d31SBarry Smith ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 11742e8a6d31SBarry Smith ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 11753a40ed3dSBarry Smith PetscFunctionReturn(0); 11768a729477SBarry Smith } 1177a66be287SLois Curfman McInnes 11785615d1e5SSatish Balay #undef __FUNC__ 1179d4bb536fSBarry Smith #define __FUNC__ "MatGetInfo_MPIAIJ" 11808f6be9afSLois Curfman McInnes int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1181a66be287SLois Curfman McInnes { 1182a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1183a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 11844e220ebcSLois Curfman McInnes int ierr; 11854e220ebcSLois Curfman McInnes double isend[5], irecv[5]; 1186a66be287SLois Curfman McInnes 11873a40ed3dSBarry Smith PetscFunctionBegin; 11884e220ebcSLois Curfman McInnes info->block_size = 1.0; 11894e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 11904e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 11914e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 11924e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 11934e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 11944e220ebcSLois Curfman McInnes isend[3] += info->memory; isend[4] += info->mallocs; 1195a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 11964e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 11974e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 11984e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 11994e220ebcSLois Curfman McInnes info->memory = isend[3]; 12004e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 1201a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 1202ca161407SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 12034e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 12044e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 12054e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 12064e220ebcSLois Curfman McInnes info->memory = irecv[3]; 12074e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1208a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 1209ca161407SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 12104e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 12114e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 12124e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 12134e220ebcSLois Curfman McInnes info->memory = irecv[3]; 12144e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1215a66be287SLois Curfman McInnes } 12164e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 12174e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 12184e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 12198d700155SBarry Smith info->rows_global = (double)mat->M; 12208d700155SBarry Smith info->columns_global = (double)mat->N; 12218d700155SBarry Smith info->rows_local = (double)mat->m; 12228d700155SBarry Smith info->columns_local = (double)mat->N; 12234e220ebcSLois Curfman McInnes 12243a40ed3dSBarry Smith PetscFunctionReturn(0); 1225a66be287SLois Curfman McInnes } 1226a66be287SLois Curfman McInnes 12275615d1e5SSatish Balay #undef __FUNC__ 1228d4bb536fSBarry Smith #define __FUNC__ "MatSetOption_MPIAIJ" 12298f6be9afSLois Curfman McInnes int MatSetOption_MPIAIJ(Mat A,MatOption op) 1230c74985f6SBarry Smith { 1231c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 12321dee9f54SBarry Smith int ierr; 1233c74985f6SBarry Smith 12343a40ed3dSBarry Smith PetscFunctionBegin; 12356d4a8577SBarry Smith if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 12366d4a8577SBarry Smith op == MAT_YES_NEW_NONZERO_LOCATIONS || 12376da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1238c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 12394787f768SSatish Balay op == MAT_NEW_NONZERO_ALLOCATION_ERR || 12404787f768SSatish Balay op == MAT_NEW_NONZERO_LOCATION_ERR) { 12411dee9f54SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 12421dee9f54SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1243b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1244aeafbbfcSLois Curfman McInnes a->roworiented = 1; 12451dee9f54SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 12461dee9f54SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1247b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 12486da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 12496d4a8577SBarry Smith op == MAT_SYMMETRIC || 12506d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 12516d4a8577SBarry Smith op == MAT_YES_NEW_DIAGONALS) 1252981c4779SBarry Smith PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 12536d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) { 12544b0e389bSBarry Smith a->roworiented = 0; 12551dee9f54SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 12561dee9f54SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 12572b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 125890f02eecSBarry Smith a->donotstash = 1; 12593a40ed3dSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS){ 12603a40ed3dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 12613a40ed3dSBarry Smith } else { 12623a40ed3dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 12633a40ed3dSBarry Smith } 12643a40ed3dSBarry Smith PetscFunctionReturn(0); 1265c74985f6SBarry Smith } 1266c74985f6SBarry Smith 12675615d1e5SSatish Balay #undef __FUNC__ 1268d4bb536fSBarry Smith #define __FUNC__ "MatGetSize_MPIAIJ" 12698f6be9afSLois Curfman McInnes int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1270c74985f6SBarry Smith { 127144a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 12723a40ed3dSBarry Smith 12733a40ed3dSBarry Smith PetscFunctionBegin; 12740752156aSBarry Smith if (m) *m = mat->M; 12750752156aSBarry Smith if (n) *n = mat->N; 12763a40ed3dSBarry Smith PetscFunctionReturn(0); 1277c74985f6SBarry Smith } 1278c74985f6SBarry Smith 12795615d1e5SSatish Balay #undef __FUNC__ 1280d4bb536fSBarry Smith #define __FUNC__ "MatGetLocalSize_MPIAIJ" 12818f6be9afSLois Curfman McInnes int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1282c74985f6SBarry Smith { 128344a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 12843a40ed3dSBarry Smith 12853a40ed3dSBarry Smith PetscFunctionBegin; 12860752156aSBarry Smith if (m) *m = mat->m; 1287f830108cSBarry Smith if (n) *n = mat->n; 12883a40ed3dSBarry Smith PetscFunctionReturn(0); 1289c74985f6SBarry Smith } 1290c74985f6SBarry Smith 12915615d1e5SSatish Balay #undef __FUNC__ 1292d4bb536fSBarry Smith #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 12938f6be9afSLois Curfman McInnes int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1294c74985f6SBarry Smith { 129544a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 12963a40ed3dSBarry Smith 12973a40ed3dSBarry Smith PetscFunctionBegin; 1298c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 12993a40ed3dSBarry Smith PetscFunctionReturn(0); 1300c74985f6SBarry Smith } 1301c74985f6SBarry Smith 13025615d1e5SSatish Balay #undef __FUNC__ 13035615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIAIJ" 13046d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 130539e00950SLois Curfman McInnes { 1306154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 130770f0671dSBarry Smith Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1308154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1309154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 131070f0671dSBarry Smith int *cmap, *idx_p; 131139e00950SLois Curfman McInnes 13123a40ed3dSBarry Smith PetscFunctionBegin; 1313a8c6a408SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 13147a0afa10SBarry Smith mat->getrowactive = PETSC_TRUE; 13157a0afa10SBarry Smith 131670f0671dSBarry Smith if (!mat->rowvalues && (idx || v)) { 13177a0afa10SBarry Smith /* 13187a0afa10SBarry Smith allocate enough space to hold information from the longest row. 13197a0afa10SBarry Smith */ 13207a0afa10SBarry Smith Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1321c16cb8f2SBarry Smith int max = 1,m = mat->m,tmp; 1322c16cb8f2SBarry Smith for ( i=0; i<m; i++ ) { 13237a0afa10SBarry Smith tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 13247a0afa10SBarry Smith if (max < tmp) { max = tmp; } 13257a0afa10SBarry Smith } 13263f97c4b0SBarry Smith mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 13277a0afa10SBarry Smith mat->rowindices = (int *) (mat->rowvalues + max); 13287a0afa10SBarry Smith } 13297a0afa10SBarry Smith 1330a8c6a408SBarry Smith if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1331abc0e9e4SLois Curfman McInnes lrow = row - rstart; 133239e00950SLois Curfman McInnes 1333154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1334154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1335154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 1336f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1337f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1338154123eaSLois Curfman McInnes nztot = nzA + nzB; 1339154123eaSLois Curfman McInnes 134070f0671dSBarry Smith cmap = mat->garray; 1341154123eaSLois Curfman McInnes if (v || idx) { 1342154123eaSLois Curfman McInnes if (nztot) { 1343154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 134470f0671dSBarry Smith int imark = -1; 1345154123eaSLois Curfman McInnes if (v) { 134670f0671dSBarry Smith *v = v_p = mat->rowvalues; 134739e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 134870f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1349154123eaSLois Curfman McInnes else break; 1350154123eaSLois Curfman McInnes } 1351154123eaSLois Curfman McInnes imark = i; 135270f0671dSBarry Smith for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 135370f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1354154123eaSLois Curfman McInnes } 1355154123eaSLois Curfman McInnes if (idx) { 135670f0671dSBarry Smith *idx = idx_p = mat->rowindices; 135770f0671dSBarry Smith if (imark > -1) { 135870f0671dSBarry Smith for ( i=0; i<imark; i++ ) { 135970f0671dSBarry Smith idx_p[i] = cmap[cworkB[i]]; 136070f0671dSBarry Smith } 136170f0671dSBarry Smith } else { 1362154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 136370f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1364154123eaSLois Curfman McInnes else break; 1365154123eaSLois Curfman McInnes } 1366154123eaSLois Curfman McInnes imark = i; 136770f0671dSBarry Smith } 136870f0671dSBarry Smith for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 136970f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 137039e00950SLois Curfman McInnes } 13713f97c4b0SBarry Smith } else { 13721ca473b0SSatish Balay if (idx) *idx = 0; 13731ca473b0SSatish Balay if (v) *v = 0; 13741ca473b0SSatish Balay } 1375154123eaSLois Curfman McInnes } 137639e00950SLois Curfman McInnes *nz = nztot; 1377f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1378f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 13793a40ed3dSBarry Smith PetscFunctionReturn(0); 138039e00950SLois Curfman McInnes } 138139e00950SLois Curfman McInnes 13825615d1e5SSatish Balay #undef __FUNC__ 1383d4bb536fSBarry Smith #define __FUNC__ "MatRestoreRow_MPIAIJ" 13846d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 138539e00950SLois Curfman McInnes { 13867a0afa10SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 13873a40ed3dSBarry Smith 13883a40ed3dSBarry Smith PetscFunctionBegin; 13897a0afa10SBarry Smith if (aij->getrowactive == PETSC_FALSE) { 1390a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 13917a0afa10SBarry Smith } 13927a0afa10SBarry Smith aij->getrowactive = PETSC_FALSE; 13933a40ed3dSBarry Smith PetscFunctionReturn(0); 139439e00950SLois Curfman McInnes } 139539e00950SLois Curfman McInnes 13965615d1e5SSatish Balay #undef __FUNC__ 13975615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIAIJ" 13988f6be9afSLois Curfman McInnes int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1399855ac2c5SLois Curfman McInnes { 1400855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1401ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1402416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1403416022c9SBarry Smith double sum = 0.0; 140404ca555eSLois Curfman McInnes Scalar *v; 140504ca555eSLois Curfman McInnes 14063a40ed3dSBarry Smith PetscFunctionBegin; 140717699dbbSLois Curfman McInnes if (aij->size == 1) { 140814183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 140937fa93a5SLois Curfman McInnes } else { 141004ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 141104ca555eSLois Curfman McInnes v = amat->a; 141204ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 14133a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 1414e20fef11SSatish Balay sum += PetscReal(PetscConj(*v)*(*v)); v++; 141504ca555eSLois Curfman McInnes #else 141604ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 141704ca555eSLois Curfman McInnes #endif 141804ca555eSLois Curfman McInnes } 141904ca555eSLois Curfman McInnes v = bmat->a; 142004ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 14213a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 1422e20fef11SSatish Balay sum += PetscReal(PetscConj(*v)*(*v)); v++; 142304ca555eSLois Curfman McInnes #else 142404ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 142504ca555eSLois Curfman McInnes #endif 142604ca555eSLois Curfman McInnes } 1427ca161407SBarry Smith ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 142804ca555eSLois Curfman McInnes *norm = sqrt(*norm); 14293a40ed3dSBarry Smith } else if (type == NORM_1) { /* max column norm */ 143004ca555eSLois Curfman McInnes double *tmp, *tmp2; 143104ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 1432758f045eSSatish Balay tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1433758f045eSSatish Balay tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1434cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 143504ca555eSLois Curfman McInnes *norm = 0.0; 143604ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 143704ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1438579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 143904ca555eSLois Curfman McInnes } 144004ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 144104ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1442579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 144304ca555eSLois Curfman McInnes } 1444ca161407SBarry Smith ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 144504ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 144604ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 144704ca555eSLois Curfman McInnes } 14480452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 14493a40ed3dSBarry Smith } else if (type == NORM_INFINITY) { /* max row norm */ 1450515d9167SLois Curfman McInnes double ntemp = 0.0; 145104ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1452dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 145304ca555eSLois Curfman McInnes sum = 0.0; 145404ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1455cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 145604ca555eSLois Curfman McInnes } 1457dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 145804ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1459cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 146004ca555eSLois Curfman McInnes } 1461515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 146204ca555eSLois Curfman McInnes } 1463ca161407SBarry Smith ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1464ca161407SBarry Smith } else { 1465a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 146604ca555eSLois Curfman McInnes } 146737fa93a5SLois Curfman McInnes } 14683a40ed3dSBarry Smith PetscFunctionReturn(0); 1469855ac2c5SLois Curfman McInnes } 1470855ac2c5SLois Curfman McInnes 14715615d1e5SSatish Balay #undef __FUNC__ 14725615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIAIJ" 14738f6be9afSLois Curfman McInnes int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1474b7c46309SBarry Smith { 1475b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1476dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1477416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1478b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 14793a40ed3dSBarry Smith Mat B; 1480b7c46309SBarry Smith Scalar *array; 1481b7c46309SBarry Smith 14823a40ed3dSBarry Smith PetscFunctionBegin; 1483d4bb536fSBarry Smith if (matout == PETSC_NULL && M != N) { 1484a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1485d4bb536fSBarry Smith } 1486d4bb536fSBarry Smith 1487d4bb536fSBarry Smith ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1488b7c46309SBarry Smith 1489b7c46309SBarry Smith /* copy over the A part */ 1490ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1491b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1492b7c46309SBarry Smith row = a->rstart; 1493dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1494b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1495416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1496b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1497b7c46309SBarry Smith } 1498b7c46309SBarry Smith aj = Aloc->j; 14994af08d9eSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1500b7c46309SBarry Smith 1501b7c46309SBarry Smith /* copy over the B part */ 1502ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1503b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1504b7c46309SBarry Smith row = a->rstart; 15050452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1506dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1507b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1508416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1509b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1510b7c46309SBarry Smith } 15110452661fSBarry Smith PetscFree(ct); 15126d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 15136d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 15143638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 15150de55854SLois Curfman McInnes *matout = B; 15160de55854SLois Curfman McInnes } else { 1517f830108cSBarry Smith PetscOps *Abops; 1518f830108cSBarry Smith struct _MatOps *Aops; 1519f830108cSBarry Smith 15200de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 15210452661fSBarry Smith PetscFree(a->rowners); 15220de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 15230de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 1524b1fc9764SSatish Balay #if defined (USE_CTABLE) 1525b1fc9764SSatish Balay if (a->colmap) TableDelete(a->colmap); 1526b1fc9764SSatish Balay #else 15270452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 1528b1fc9764SSatish Balay #endif 15290452661fSBarry Smith if (a->garray) PetscFree(a->garray); 15300de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1531a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 15320452661fSBarry Smith PetscFree(a); 1533f830108cSBarry Smith 1534f830108cSBarry Smith /* 1535f830108cSBarry Smith This is horrible, horrible code. We need to keep the 1536f830108cSBarry Smith A pointers for the bops and ops but copy everything 1537f830108cSBarry Smith else from C. 1538f830108cSBarry Smith */ 1539f830108cSBarry Smith Abops = A->bops; 1540f830108cSBarry Smith Aops = A->ops; 1541f09e8eb9SSatish Balay PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1542f830108cSBarry Smith A->bops = Abops; 1543f830108cSBarry Smith A->ops = Aops; 15440452661fSBarry Smith PetscHeaderDestroy(B); 15450de55854SLois Curfman McInnes } 15463a40ed3dSBarry Smith PetscFunctionReturn(0); 1547b7c46309SBarry Smith } 1548b7c46309SBarry Smith 15495615d1e5SSatish Balay #undef __FUNC__ 15505615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIAIJ" 15514b967eb1SSatish Balay int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1552a008b906SSatish Balay { 15534b967eb1SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 15544b967eb1SSatish Balay Mat a = aij->A, b = aij->B; 1555a008b906SSatish Balay int ierr,s1,s2,s3; 1556a008b906SSatish Balay 15573a40ed3dSBarry Smith PetscFunctionBegin; 15584b967eb1SSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 15594b967eb1SSatish Balay if (rr) { 1560e1311b90SBarry Smith ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1561a8c6a408SBarry Smith if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 15624b967eb1SSatish Balay /* Overlap communication with computation. */ 156343a90d84SBarry Smith ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1564a008b906SSatish Balay } 15654b967eb1SSatish Balay if (ll) { 1566e1311b90SBarry Smith ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1567a8c6a408SBarry Smith if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1568f830108cSBarry Smith ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 15694b967eb1SSatish Balay } 15704b967eb1SSatish Balay /* scale the diagonal block */ 1571f830108cSBarry Smith ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 15724b967eb1SSatish Balay 15734b967eb1SSatish Balay if (rr) { 15744b967eb1SSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 157543a90d84SBarry Smith ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1576f830108cSBarry Smith ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 15774b967eb1SSatish Balay } 15784b967eb1SSatish Balay 15793a40ed3dSBarry Smith PetscFunctionReturn(0); 1580a008b906SSatish Balay } 1581a008b906SSatish Balay 1582a008b906SSatish Balay 15835615d1e5SSatish Balay #undef __FUNC__ 1584d4bb536fSBarry Smith #define __FUNC__ "MatPrintHelp_MPIAIJ" 15858f6be9afSLois Curfman McInnes int MatPrintHelp_MPIAIJ(Mat A) 1586682d7d0cSBarry Smith { 1587682d7d0cSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 15883a40ed3dSBarry Smith int ierr; 1589682d7d0cSBarry Smith 15903a40ed3dSBarry Smith PetscFunctionBegin; 15913a40ed3dSBarry Smith if (!a->rank) { 15923a40ed3dSBarry Smith ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 15933a40ed3dSBarry Smith } 15943a40ed3dSBarry Smith PetscFunctionReturn(0); 1595682d7d0cSBarry Smith } 1596682d7d0cSBarry Smith 15975615d1e5SSatish Balay #undef __FUNC__ 1598d4bb536fSBarry Smith #define __FUNC__ "MatGetBlockSize_MPIAIJ" 15998f6be9afSLois Curfman McInnes int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 16005a838052SSatish Balay { 16013a40ed3dSBarry Smith PetscFunctionBegin; 16025a838052SSatish Balay *bs = 1; 16033a40ed3dSBarry Smith PetscFunctionReturn(0); 16045a838052SSatish Balay } 16055615d1e5SSatish Balay #undef __FUNC__ 1606d4bb536fSBarry Smith #define __FUNC__ "MatSetUnfactored_MPIAIJ" 16078f6be9afSLois Curfman McInnes int MatSetUnfactored_MPIAIJ(Mat A) 1608bb5a7306SBarry Smith { 1609bb5a7306SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1610bb5a7306SBarry Smith int ierr; 16113a40ed3dSBarry Smith 16123a40ed3dSBarry Smith PetscFunctionBegin; 1613bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 16143a40ed3dSBarry Smith PetscFunctionReturn(0); 1615bb5a7306SBarry Smith } 1616bb5a7306SBarry Smith 1617d4bb536fSBarry Smith #undef __FUNC__ 1618d4bb536fSBarry Smith #define __FUNC__ "MatEqual_MPIAIJ" 1619d4bb536fSBarry Smith int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1620d4bb536fSBarry Smith { 1621d4bb536fSBarry Smith Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1622d4bb536fSBarry Smith Mat a, b, c, d; 1623d4bb536fSBarry Smith PetscTruth flg; 1624d4bb536fSBarry Smith int ierr; 1625d4bb536fSBarry Smith 16263a40ed3dSBarry Smith PetscFunctionBegin; 1627a8c6a408SBarry Smith if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1628d4bb536fSBarry Smith a = matA->A; b = matA->B; 1629d4bb536fSBarry Smith c = matB->A; d = matB->B; 1630d4bb536fSBarry Smith 1631d4bb536fSBarry Smith ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1632d4bb536fSBarry Smith if (flg == PETSC_TRUE) { 1633d4bb536fSBarry Smith ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1634d4bb536fSBarry Smith } 1635ca161407SBarry Smith ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 16363a40ed3dSBarry Smith PetscFunctionReturn(0); 1637d4bb536fSBarry Smith } 1638d4bb536fSBarry Smith 1639cb5b572fSBarry Smith #undef __FUNC__ 1640cb5b572fSBarry Smith #define __FUNC__ "MatCopy_MPIAIJ" 1641cb5b572fSBarry Smith int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1642cb5b572fSBarry Smith { 1643cb5b572fSBarry Smith int ierr; 1644cb5b572fSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1645cb5b572fSBarry Smith Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1646cb5b572fSBarry Smith 1647cb5b572fSBarry Smith PetscFunctionBegin; 1648cb5b572fSBarry Smith if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1649cb5b572fSBarry Smith /* because of the column compression in the off-processor part of the matrix a->B, 1650cb5b572fSBarry Smith the number of columns in a->B and b->B may be different, hence we cannot call 1651cb5b572fSBarry Smith the MatCopy() directly on the two parts. If need be, we can provide a more 1652cb5b572fSBarry Smith efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1653cb5b572fSBarry Smith then copying the submatrices */ 1654cb5b572fSBarry Smith ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1655cb5b572fSBarry Smith } else { 1656cb5b572fSBarry Smith ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1657cb5b572fSBarry Smith ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1658cb5b572fSBarry Smith } 1659cb5b572fSBarry Smith PetscFunctionReturn(0); 1660cb5b572fSBarry Smith } 1661cb5b572fSBarry Smith 16622e8a6d31SBarry Smith extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 16632f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 16640a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 16657b2a1423SBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **); 16667b2a1423SBarry Smith extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *); 166700e6dbe6SBarry Smith 16688a729477SBarry Smith /* -------------------------------------------------------------------*/ 1669cda55fadSBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1670cda55fadSBarry Smith MatGetRow_MPIAIJ, 1671cda55fadSBarry Smith MatRestoreRow_MPIAIJ, 1672cda55fadSBarry Smith MatMult_MPIAIJ, 1673cda55fadSBarry Smith MatMultAdd_MPIAIJ, 1674cda55fadSBarry Smith MatMultTrans_MPIAIJ, 1675cda55fadSBarry Smith MatMultTransAdd_MPIAIJ, 1676cda55fadSBarry Smith 0, 1677cda55fadSBarry Smith 0, 1678cda55fadSBarry Smith 0, 1679cda55fadSBarry Smith 0, 1680cda55fadSBarry Smith 0, 1681cda55fadSBarry Smith 0, 168244a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1683b7c46309SBarry Smith MatTranspose_MPIAIJ, 1684cda55fadSBarry Smith MatGetInfo_MPIAIJ, 1685cda55fadSBarry Smith MatEqual_MPIAIJ, 1686cda55fadSBarry Smith MatGetDiagonal_MPIAIJ, 1687cda55fadSBarry Smith MatDiagonalScale_MPIAIJ, 1688cda55fadSBarry Smith MatNorm_MPIAIJ, 1689cda55fadSBarry Smith MatAssemblyBegin_MPIAIJ, 1690cda55fadSBarry Smith MatAssemblyEnd_MPIAIJ, 16911eb62cbbSBarry Smith 0, 1692cda55fadSBarry Smith MatSetOption_MPIAIJ, 1693cda55fadSBarry Smith MatZeroEntries_MPIAIJ, 1694cda55fadSBarry Smith MatZeroRows_MPIAIJ, 1695cda55fadSBarry Smith 0, 1696cda55fadSBarry Smith 0, 1697cda55fadSBarry Smith 0, 1698cda55fadSBarry Smith 0, 1699cda55fadSBarry Smith MatGetSize_MPIAIJ, 1700cda55fadSBarry Smith MatGetLocalSize_MPIAIJ, 1701cda55fadSBarry Smith MatGetOwnershipRange_MPIAIJ, 1702cda55fadSBarry Smith 0, 1703cda55fadSBarry Smith 0, 1704cda55fadSBarry Smith 0, 1705cda55fadSBarry Smith 0, 17062e8a6d31SBarry Smith MatDuplicate_MPIAIJ, 1707cda55fadSBarry Smith 0, 1708cda55fadSBarry Smith 0, 1709cda55fadSBarry Smith 0, 1710cda55fadSBarry Smith 0, 1711cda55fadSBarry Smith 0, 1712cda55fadSBarry Smith MatGetSubMatrices_MPIAIJ, 1713cda55fadSBarry Smith MatIncreaseOverlap_MPIAIJ, 1714cda55fadSBarry Smith MatGetValues_MPIAIJ, 1715cb5b572fSBarry Smith MatCopy_MPIAIJ, 1716052efed2SBarry Smith MatPrintHelp_MPIAIJ, 1717cda55fadSBarry Smith MatScale_MPIAIJ, 1718cda55fadSBarry Smith 0, 1719cda55fadSBarry Smith 0, 1720cda55fadSBarry Smith 0, 1721cda55fadSBarry Smith MatGetBlockSize_MPIAIJ, 1722cda55fadSBarry Smith 0, 1723cda55fadSBarry Smith 0, 1724cda55fadSBarry Smith 0, 1725cda55fadSBarry Smith 0, 1726cda55fadSBarry Smith MatFDColoringCreate_MPIAIJ, 1727cda55fadSBarry Smith 0, 1728cda55fadSBarry Smith MatSetUnfactored_MPIAIJ, 1729cda55fadSBarry Smith 0, 1730cda55fadSBarry Smith 0, 1731cda55fadSBarry Smith MatGetSubMatrix_MPIAIJ, 1732cda55fadSBarry Smith 0, 1733cda55fadSBarry Smith 0, 1734cda55fadSBarry Smith MatGetMaps_Petsc}; 173536ce4990SBarry Smith 17362e8a6d31SBarry Smith /* ----------------------------------------------------------------------------------------*/ 17372e8a6d31SBarry Smith 1738fb2e594dSBarry Smith EXTERN_C_BEGIN 17392e8a6d31SBarry Smith #undef __FUNC__ 17402e8a6d31SBarry Smith #define __FUNC__ "MatStoreValues_MPIAIJ" 17412e8a6d31SBarry Smith int MatStoreValues_MPIAIJ(Mat mat) 17422e8a6d31SBarry Smith { 17432e8a6d31SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 17442e8a6d31SBarry Smith int ierr; 17452e8a6d31SBarry Smith 17462e8a6d31SBarry Smith PetscFunctionBegin; 17472e8a6d31SBarry Smith ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 17482e8a6d31SBarry Smith ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 17492e8a6d31SBarry Smith PetscFunctionReturn(0); 17502e8a6d31SBarry Smith } 1751fb2e594dSBarry Smith EXTERN_C_END 17522e8a6d31SBarry Smith 1753fb2e594dSBarry Smith EXTERN_C_BEGIN 17542e8a6d31SBarry Smith #undef __FUNC__ 17552e8a6d31SBarry Smith #define __FUNC__ "MatRetrieveValues_MPIAIJ" 17562e8a6d31SBarry Smith int MatRetrieveValues_MPIAIJ(Mat mat) 17572e8a6d31SBarry Smith { 17582e8a6d31SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 17592e8a6d31SBarry Smith int ierr; 17602e8a6d31SBarry Smith 17612e8a6d31SBarry Smith PetscFunctionBegin; 17622e8a6d31SBarry Smith ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 17632e8a6d31SBarry Smith ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 17642e8a6d31SBarry Smith PetscFunctionReturn(0); 17652e8a6d31SBarry Smith } 1766fb2e594dSBarry Smith EXTERN_C_END 17678a729477SBarry Smith 176827508adbSBarry Smith #include "pc.h" 176927508adbSBarry Smith EXTERN_C_BEGIN 17705ef9f2a5SBarry Smith extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 177127508adbSBarry Smith EXTERN_C_END 177227508adbSBarry Smith 17735615d1e5SSatish Balay #undef __FUNC__ 17745615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIAIJ" 17751987afe7SBarry Smith /*@C 1776ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 17773a511b96SLois Curfman McInnes (the default parallel PETSc format). For good matrix assembly performance 17783a511b96SLois Curfman McInnes the user should preallocate the matrix storage by setting the parameters 17793a511b96SLois Curfman McInnes d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 17803a511b96SLois Curfman McInnes performance can be increased by more than a factor of 50. 17818a729477SBarry Smith 1782db81eaa0SLois Curfman McInnes Collective on MPI_Comm 1783db81eaa0SLois Curfman McInnes 17848a729477SBarry Smith Input Parameters: 1785db81eaa0SLois Curfman McInnes + comm - MPI communicator 17867d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 178792e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 178892e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 17891a3896d6SBarry Smith . n - This value should be the same as the local size used in creating the 17901a3896d6SBarry Smith x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 17911a3896d6SBarry Smith calculated if N is given) For square matrices n is almost always m. 179260d380a7SBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 179360d380a7SBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1794af1d9917SSatish Balay . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1795af1d9917SSatish Balay (same value is used for all local rows) 1796af1d9917SSatish Balay . d_nnz - array containing the number of nonzeros in the various rows of the 1797af1d9917SSatish Balay DIAGONAL portion of the local submatrix (possibly different for each row) 1798af1d9917SSatish Balay or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1799af1d9917SSatish Balay The size of this array is equal to the number of local rows, i.e 'm'. 1800af1d9917SSatish Balay You must leave room for the diagonal entry even if it is zero. 1801af1d9917SSatish Balay . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1802af1d9917SSatish Balay submatrix (same value is used for all local rows). 1803af1d9917SSatish Balay - o_nnz - array containing the number of nonzeros in the various rows of the 1804af1d9917SSatish Balay OFF-DIAGONAL portion of the local submatrix (possibly different for 1805af1d9917SSatish Balay each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1806af1d9917SSatish Balay structure. The size of this array is equal to the number 1807af1d9917SSatish Balay of local rows, i.e 'm'. 18088a729477SBarry Smith 1809ff756334SLois Curfman McInnes Output Parameter: 181044cd7ae7SLois Curfman McInnes . A - the matrix 18118a729477SBarry Smith 1812b259b22eSLois Curfman McInnes Notes: 1813af1d9917SSatish Balay m,n,M,N parameters specify the size of the matrix, and its partitioning across 1814af1d9917SSatish Balay processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1815af1d9917SSatish Balay storage requirements for this matrix. 1816af1d9917SSatish Balay 1817af1d9917SSatish Balay If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1818af1d9917SSatish Balay processor than it must be used on all processors that share the object for 1819af1d9917SSatish Balay that argument. 1820be79a94dSBarry Smith 1821ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1822ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 18230002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 18240002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1825ff756334SLois Curfman McInnes 1826ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1827ff756334SLois Curfman McInnes (possibly both). 1828ff756334SLois Curfman McInnes 1829af1d9917SSatish Balay The parallel matrix is partitioned such that the first m0 rows belong to 1830af1d9917SSatish Balay process 0, the next m1 rows belong to process 1, the next m2 rows belong 1831af1d9917SSatish Balay to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1832af1d9917SSatish Balay 1833af1d9917SSatish Balay The DIAGONAL portion of the local submatrix of a processor can be defined 1834af1d9917SSatish Balay as the submatrix which is obtained by extraction the part corresponding 1835af1d9917SSatish Balay to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1836af1d9917SSatish Balay first row that belongs to the processor, and r2 is the last row belonging 1837af1d9917SSatish Balay to the this processor. This is a square mxm matrix. The remaining portion 1838af1d9917SSatish Balay of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1839af1d9917SSatish Balay 1840af1d9917SSatish Balay If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1841af1d9917SSatish Balay 18425511cfe3SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 18435511cfe3SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 18445511cfe3SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 18455511cfe3SLois Curfman McInnes 18465511cfe3SLois Curfman McInnes Options Database Keys: 1847db81eaa0SLois Curfman McInnes + -mat_aij_no_inode - Do not use inodes 1848db81eaa0SLois Curfman McInnes . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1849db81eaa0SLois Curfman McInnes - -mat_aij_oneindex - Internally use indexing starting at 1 1850db81eaa0SLois Curfman McInnes rather than 0. Note that when calling MatSetValues(), 1851db81eaa0SLois Curfman McInnes the user still MUST index entries starting at 0! 1852494eafd4SBarry Smith . -mat_mpi - use the parallel matrix data structures even on one processor 1853494eafd4SBarry Smith (defaults to using SeqBAIJ format on one processor) 1854494eafd4SBarry Smith . -mat_mpi - use the parallel matrix data structures even on one processor 1855494eafd4SBarry Smith (defaults to using SeqAIJ format on one processor) 1856494eafd4SBarry Smith 18575511cfe3SLois Curfman McInnes 1858af1d9917SSatish Balay Example usage: 1859e0245417SLois Curfman McInnes 1860af1d9917SSatish Balay Consider the following 8x8 matrix with 34 non-zero values, that is 1861af1d9917SSatish Balay assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1862af1d9917SSatish Balay proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1863af1d9917SSatish Balay as follows: 18642191d07cSBarry Smith 1865db81eaa0SLois Curfman McInnes .vb 1866af1d9917SSatish Balay 1 2 0 | 0 3 0 | 0 4 1867af1d9917SSatish Balay Proc0 0 5 6 | 7 0 0 | 8 0 1868af1d9917SSatish Balay 9 0 10 | 11 0 0 | 12 0 1869af1d9917SSatish Balay ------------------------------------- 1870af1d9917SSatish Balay 13 0 14 | 15 16 17 | 0 0 1871af1d9917SSatish Balay Proc1 0 18 0 | 19 20 21 | 0 0 1872af1d9917SSatish Balay 0 0 0 | 22 23 0 | 24 0 1873af1d9917SSatish Balay ------------------------------------- 1874af1d9917SSatish Balay Proc2 25 26 27 | 0 0 28 | 29 0 1875af1d9917SSatish Balay 30 0 0 | 31 32 33 | 0 34 1876db81eaa0SLois Curfman McInnes .ve 1877b810aeb4SBarry Smith 1878af1d9917SSatish Balay This can be represented as a collection of submatrices as: 18795511cfe3SLois Curfman McInnes 1880af1d9917SSatish Balay .vb 1881af1d9917SSatish Balay A B C 1882af1d9917SSatish Balay D E F 1883af1d9917SSatish Balay G H I 1884af1d9917SSatish Balay .ve 1885af1d9917SSatish Balay 1886af1d9917SSatish Balay Where the submatrices A,B,C are owned by proc0, D,E,F are 1887af1d9917SSatish Balay owned by proc1, G,H,I are owned by proc2. 1888af1d9917SSatish Balay 1889af1d9917SSatish Balay The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1890af1d9917SSatish Balay The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1891af1d9917SSatish Balay The 'M','N' parameters are 8,8, and have the same values on all procs. 1892af1d9917SSatish Balay 1893af1d9917SSatish Balay The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1894af1d9917SSatish Balay submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1895af1d9917SSatish Balay corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1896af1d9917SSatish Balay Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1897af1d9917SSatish Balay part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1898af1d9917SSatish Balay matrix, ans [DF] as another SeqAIJ matrix. 1899af1d9917SSatish Balay 1900af1d9917SSatish Balay When d_nz, o_nz parameters are specified, d_nz storage elements are 1901af1d9917SSatish Balay allocated for every row of the local diagonal submatrix, and o_nz 1902af1d9917SSatish Balay storage locations are allocated for every row of the OFF-DIAGONAL submat. 1903af1d9917SSatish Balay One way to choose d_nz and o_nz is to use the max nonzerors per local 1904af1d9917SSatish Balay rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1905af1d9917SSatish Balay In this case, the values of d_nz,o_nz are: 1906af1d9917SSatish Balay .vb 1907af1d9917SSatish Balay proc0 : dnz = 2, o_nz = 2 1908af1d9917SSatish Balay proc1 : dnz = 3, o_nz = 2 1909af1d9917SSatish Balay proc2 : dnz = 1, o_nz = 4 1910af1d9917SSatish Balay .ve 1911af1d9917SSatish Balay We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1912af1d9917SSatish Balay translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1913af1d9917SSatish Balay for proc3. i.e we are using 12+15+10=37 storage locations to store 1914af1d9917SSatish Balay 34 values. 1915af1d9917SSatish Balay 1916af1d9917SSatish Balay When d_nnz, o_nnz parameters are specified, the storage is specified 1917af1d9917SSatish Balay for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1918af1d9917SSatish Balay In the above case the values for d_nnz,o_nnz are: 1919af1d9917SSatish Balay .vb 1920af1d9917SSatish Balay proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1921af1d9917SSatish Balay proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1922af1d9917SSatish Balay proc2: d_nnz = [1,1] and o_nnz = [4,4] 1923af1d9917SSatish Balay .ve 1924af1d9917SSatish Balay Here the space allocated is sum of all the above values i.e 34, and 1925af1d9917SSatish Balay hence pre-allocation is perfect. 19263a511b96SLois Curfman McInnes 1927027ccd11SLois Curfman McInnes Level: intermediate 1928027ccd11SLois Curfman McInnes 1929dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1930ff756334SLois Curfman McInnes 1931fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 19328a729477SBarry Smith @*/ 1933e1311b90SBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 19348a729477SBarry Smith { 193544cd7ae7SLois Curfman McInnes Mat B; 193644cd7ae7SLois Curfman McInnes Mat_MPIAIJ *b; 1937eac7125bSBarry Smith int ierr, i,size,flag1 = 0, flag2 = 0; 1938416022c9SBarry Smith 19393a40ed3dSBarry Smith PetscFunctionBegin; 19403914022bSBarry Smith MPI_Comm_size(comm,&size); 19417be0e774SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1); CHKERRQ(ierr); 19427be0e774SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 19437be0e774SBarry Smith if (!flag1 && !flag2 && size == 1) { 19443914022bSBarry Smith if (M == PETSC_DECIDE) M = m; 19453914022bSBarry Smith if (N == PETSC_DECIDE) N = n; 19463914022bSBarry Smith ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 19473a40ed3dSBarry Smith PetscFunctionReturn(0); 19483914022bSBarry Smith } 19493914022bSBarry Smith 195044cd7ae7SLois Curfman McInnes *A = 0; 19513f1db9ecSBarry Smith PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 195244cd7ae7SLois Curfman McInnes PLogObjectCreate(B); 195344cd7ae7SLois Curfman McInnes B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 195444cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1955cda55fadSBarry Smith PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1956e1311b90SBarry Smith B->ops->destroy = MatDestroy_MPIAIJ; 1957e1311b90SBarry Smith B->ops->view = MatView_MPIAIJ; 195844cd7ae7SLois Curfman McInnes B->factor = 0; 195944cd7ae7SLois Curfman McInnes B->assembled = PETSC_FALSE; 196090f02eecSBarry Smith B->mapping = 0; 1961d6dfbf8fSBarry Smith 196247794344SBarry Smith B->insertmode = NOT_SET_VALUES; 19639eb4d147SSatish Balay b->size = size; 196444cd7ae7SLois Curfman McInnes MPI_Comm_rank(comm,&b->rank); 19651eb62cbbSBarry Smith 19663a40ed3dSBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1967a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 19683a40ed3dSBarry Smith } 19691987afe7SBarry Smith 1970eac7125bSBarry Smith ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1971eac7125bSBarry Smith ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 197244cd7ae7SLois Curfman McInnes b->m = m; B->m = m; 197344cd7ae7SLois Curfman McInnes b->n = n; B->n = n; 197444cd7ae7SLois Curfman McInnes b->N = N; B->N = N; 197544cd7ae7SLois Curfman McInnes b->M = M; B->M = M; 19761eb62cbbSBarry Smith 1977c7fcc2eaSBarry Smith /* the information in the maps duplicates the information computed below, eventually 1978c7fcc2eaSBarry Smith we should remove the duplicate information that is not contained in the maps */ 1979253a16ddSBarry Smith ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1980253a16ddSBarry Smith ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1981c7fcc2eaSBarry Smith 19821eb62cbbSBarry Smith /* build local table of row and column ownerships */ 198344cd7ae7SLois Curfman McInnes b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1984f09e8eb9SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1985603f58a4SSatish Balay b->cowners = b->rowners + b->size + 2; 1986ca161407SBarry Smith ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 198744cd7ae7SLois Curfman McInnes b->rowners[0] = 0; 198844cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 198944cd7ae7SLois Curfman McInnes b->rowners[i] += b->rowners[i-1]; 19908a729477SBarry Smith } 199144cd7ae7SLois Curfman McInnes b->rstart = b->rowners[b->rank]; 199244cd7ae7SLois Curfman McInnes b->rend = b->rowners[b->rank+1]; 1993ca161407SBarry Smith ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 199444cd7ae7SLois Curfman McInnes b->cowners[0] = 0; 199544cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 199644cd7ae7SLois Curfman McInnes b->cowners[i] += b->cowners[i-1]; 19978a729477SBarry Smith } 199844cd7ae7SLois Curfman McInnes b->cstart = b->cowners[b->rank]; 199944cd7ae7SLois Curfman McInnes b->cend = b->cowners[b->rank+1]; 20008a729477SBarry Smith 20015ace5be8SLois Curfman McInnes if (d_nz == PETSC_DEFAULT) d_nz = 5; 2002029af93fSBarry Smith ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 200344cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->A); 20047b8455f0SLois Curfman McInnes if (o_nz == PETSC_DEFAULT) o_nz = 0; 2005029af93fSBarry Smith ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 200644cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->B); 20078a729477SBarry Smith 20081eb62cbbSBarry Smith /* build cache for off array entries formed */ 2009*bc5ccf88SSatish Balay ierr = StashCreate_Private(comm,1,&b->stash); CHKERRQ(ierr); 201090f02eecSBarry Smith b->donotstash = 0; 201144cd7ae7SLois Curfman McInnes b->colmap = 0; 201244cd7ae7SLois Curfman McInnes b->garray = 0; 201344cd7ae7SLois Curfman McInnes b->roworiented = 1; 20148a729477SBarry Smith 20151eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 201644cd7ae7SLois Curfman McInnes b->lvec = 0; 201744cd7ae7SLois Curfman McInnes b->Mvctx = 0; 20188a729477SBarry Smith 20197a0afa10SBarry Smith /* stuff for MatGetRow() */ 202044cd7ae7SLois Curfman McInnes b->rowindices = 0; 202144cd7ae7SLois Curfman McInnes b->rowvalues = 0; 202244cd7ae7SLois Curfman McInnes b->getrowactive = PETSC_FALSE; 20237a0afa10SBarry Smith 20242e8a6d31SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 20252e8a6d31SBarry Smith "MatStoreValues_MPIAIJ", 20262e8a6d31SBarry Smith (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 20272e8a6d31SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 20282e8a6d31SBarry Smith "MatRetrieveValues_MPIAIJ", 20292e8a6d31SBarry Smith (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 20305ef9f2a5SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 20315ef9f2a5SBarry Smith "MatGetDiagonalBlock_MPIAIJ", 20325ef9f2a5SBarry Smith (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 203344cd7ae7SLois Curfman McInnes *A = B; 20343a40ed3dSBarry Smith PetscFunctionReturn(0); 2035d6dfbf8fSBarry Smith } 2036c74985f6SBarry Smith 20375615d1e5SSatish Balay #undef __FUNC__ 20382e8a6d31SBarry Smith #define __FUNC__ "MatDuplicate_MPIAIJ" 20392e8a6d31SBarry Smith int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2040d6dfbf8fSBarry Smith { 2041d6dfbf8fSBarry Smith Mat mat; 2042416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 2043a1b97e82SLois Curfman McInnes int ierr, len=0, flg; 2044d6dfbf8fSBarry Smith 20453a40ed3dSBarry Smith PetscFunctionBegin; 2046416022c9SBarry Smith *newmat = 0; 20473f1db9ecSBarry Smith PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 2048a5a9c739SBarry Smith PLogObjectCreate(mat); 20490452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 2050cda55fadSBarry Smith PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 2051e1311b90SBarry Smith mat->ops->destroy = MatDestroy_MPIAIJ; 2052e1311b90SBarry Smith mat->ops->view = MatView_MPIAIJ; 2053d6dfbf8fSBarry Smith mat->factor = matin->factor; 2054c456f294SBarry Smith mat->assembled = PETSC_TRUE; 2055e7641de0SSatish Balay mat->insertmode = NOT_SET_VALUES; 2056d6dfbf8fSBarry Smith 205744cd7ae7SLois Curfman McInnes a->m = mat->m = oldmat->m; 205844cd7ae7SLois Curfman McInnes a->n = mat->n = oldmat->n; 205944cd7ae7SLois Curfman McInnes a->M = mat->M = oldmat->M; 206044cd7ae7SLois Curfman McInnes a->N = mat->N = oldmat->N; 2061d6dfbf8fSBarry Smith 2062416022c9SBarry Smith a->rstart = oldmat->rstart; 2063416022c9SBarry Smith a->rend = oldmat->rend; 2064416022c9SBarry Smith a->cstart = oldmat->cstart; 2065416022c9SBarry Smith a->cend = oldmat->cend; 206617699dbbSLois Curfman McInnes a->size = oldmat->size; 206717699dbbSLois Curfman McInnes a->rank = oldmat->rank; 2068e7641de0SSatish Balay a->donotstash = oldmat->donotstash; 2069e7641de0SSatish Balay a->roworiented = oldmat->roworiented; 2070e7641de0SSatish Balay a->rowindices = 0; 2071bcd2baecSBarry Smith a->rowvalues = 0; 2072bcd2baecSBarry Smith a->getrowactive = PETSC_FALSE; 2073d6dfbf8fSBarry Smith 2074603f58a4SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2075f09e8eb9SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 2076603f58a4SSatish Balay a->cowners = a->rowners + a->size + 2; 2077603f58a4SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 2078*bc5ccf88SSatish Balay ierr = StashCreate_Private(matin->comm,1,&a->stash); CHKERRQ(ierr); 20792ee70a88SLois Curfman McInnes if (oldmat->colmap) { 2080b1fc9764SSatish Balay #if defined (USE_CTABLE) 2081fa46199cSSatish Balay ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr); 2082b1fc9764SSatish Balay #else 20830452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 2084416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 2085416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 2086b1fc9764SSatish Balay #endif 2087416022c9SBarry Smith } else a->colmap = 0; 20883f41c07dSBarry Smith if (oldmat->garray) { 20893f41c07dSBarry Smith len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 20903f41c07dSBarry Smith a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 2091464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 20923f41c07dSBarry Smith if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 2093416022c9SBarry Smith } else a->garray = 0; 2094d6dfbf8fSBarry Smith 2095416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2096416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 2097a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2098416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 20992e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 2100416022c9SBarry Smith PLogObjectParent(mat,a->A); 21012e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 2102416022c9SBarry Smith PLogObjectParent(mat,a->B); 21035dd7a6c7SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 210425cdf11fSBarry Smith if (flg) { 2105682d7d0cSBarry Smith ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2106682d7d0cSBarry Smith } 210727508adbSBarry Smith ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 21088a729477SBarry Smith *newmat = mat; 21093a40ed3dSBarry Smith PetscFunctionReturn(0); 21108a729477SBarry Smith } 2111416022c9SBarry Smith 211277c4ece6SBarry Smith #include "sys.h" 2113416022c9SBarry Smith 21145615d1e5SSatish Balay #undef __FUNC__ 21155615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIAIJ" 211619bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 2117416022c9SBarry Smith { 2118d65a2f8fSBarry Smith Mat A; 2119d65a2f8fSBarry Smith Scalar *vals,*svals; 212019bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 2121416022c9SBarry Smith MPI_Status status; 21223a40ed3dSBarry Smith int i, nz, ierr, j,rstart, rend, fd; 212317699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 2124d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 2125b362ba68SBarry Smith int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 2126416022c9SBarry Smith 21273a40ed3dSBarry Smith PetscFunctionBegin; 212817699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 212917699dbbSLois Curfman McInnes if (!rank) { 213090ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 21310752156aSBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2132a8c6a408SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2133d64ed03dSBarry Smith if (header[3] < 0) { 2134a8c6a408SBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 2135d64ed03dSBarry Smith } 21366c5fab8fSBarry Smith } 21376c5fab8fSBarry Smith 2138ca161407SBarry Smith ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2139416022c9SBarry Smith M = header[1]; N = header[2]; 2140416022c9SBarry Smith /* determine ownership of all rows */ 214117699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 21420452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 2143ca161407SBarry Smith ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2144416022c9SBarry Smith rowners[0] = 0; 214517699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 2146416022c9SBarry Smith rowners[i] += rowners[i-1]; 2147416022c9SBarry Smith } 214817699dbbSLois Curfman McInnes rstart = rowners[rank]; 214917699dbbSLois Curfman McInnes rend = rowners[rank+1]; 2150416022c9SBarry Smith 2151416022c9SBarry Smith /* distribute row lengths to all processors */ 21520452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 2153416022c9SBarry Smith offlens = ourlens + (rend-rstart); 215417699dbbSLois Curfman McInnes if (!rank) { 21550452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 21560752156aSBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 21570452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 215817699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 2159ca161407SBarry Smith ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 21600452661fSBarry Smith PetscFree(sndcounts); 21613a40ed3dSBarry Smith } else { 2162ca161407SBarry Smith ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 2163416022c9SBarry Smith } 2164416022c9SBarry Smith 216517699dbbSLois Curfman McInnes if (!rank) { 2166416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 21670452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2168cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 216917699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 2170416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 2171416022c9SBarry Smith procsnz[i] += rowlengths[j]; 2172416022c9SBarry Smith } 2173416022c9SBarry Smith } 21740452661fSBarry Smith PetscFree(rowlengths); 2175416022c9SBarry Smith 2176416022c9SBarry Smith /* determine max buffer needed and allocate it */ 2177416022c9SBarry Smith maxnz = 0; 217817699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 21790452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 2180416022c9SBarry Smith } 21810452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2182416022c9SBarry Smith 2183416022c9SBarry Smith /* read in my part of the matrix column indices */ 2184416022c9SBarry Smith nz = procsnz[0]; 21850452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 21860752156aSBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2187d65a2f8fSBarry Smith 2188d65a2f8fSBarry Smith /* read in every one elses and ship off */ 218917699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 2190d65a2f8fSBarry Smith nz = procsnz[i]; 21910752156aSBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2192ca161407SBarry Smith ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2193d65a2f8fSBarry Smith } 21940452661fSBarry Smith PetscFree(cols); 21953a40ed3dSBarry Smith } else { 2196416022c9SBarry Smith /* determine buffer space needed for message */ 2197416022c9SBarry Smith nz = 0; 2198416022c9SBarry Smith for ( i=0; i<m; i++ ) { 2199416022c9SBarry Smith nz += ourlens[i]; 2200416022c9SBarry Smith } 22010452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 2202416022c9SBarry Smith 2203416022c9SBarry Smith /* receive message of column indices*/ 2204ca161407SBarry Smith ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2205ca161407SBarry Smith ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2206a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2207416022c9SBarry Smith } 2208416022c9SBarry Smith 2209b362ba68SBarry Smith /* determine column ownership if matrix is not square */ 2210b362ba68SBarry Smith if (N != M) { 2211b362ba68SBarry Smith n = N/size + ((N % size) > rank); 2212b362ba68SBarry Smith ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2213b362ba68SBarry Smith cstart = cend - n; 2214b362ba68SBarry Smith } else { 2215b362ba68SBarry Smith cstart = rstart; 2216b362ba68SBarry Smith cend = rend; 2217fb2e594dSBarry Smith n = cend - cstart; 2218b362ba68SBarry Smith } 2219b362ba68SBarry Smith 2220416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 2221cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 2222416022c9SBarry Smith jj = 0; 2223416022c9SBarry Smith for ( i=0; i<m; i++ ) { 2224416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 2225b362ba68SBarry Smith if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2226416022c9SBarry Smith jj++; 2227416022c9SBarry Smith } 2228416022c9SBarry Smith } 2229d65a2f8fSBarry Smith 2230d65a2f8fSBarry Smith /* create our matrix */ 2231416022c9SBarry Smith for ( i=0; i<m; i++ ) { 2232416022c9SBarry Smith ourlens[i] -= offlens[i]; 2233416022c9SBarry Smith } 2234b362ba68SBarry Smith ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2235d65a2f8fSBarry Smith A = *newmat; 2236fb2e594dSBarry Smith ierr = MatSetOption(A,MAT_COLUMNS_SORTED); CHKERRQ(ierr); 2237d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 2238d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 2239d65a2f8fSBarry Smith } 2240416022c9SBarry Smith 224117699dbbSLois Curfman McInnes if (!rank) { 22420452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 2243416022c9SBarry Smith 2244416022c9SBarry Smith /* read in my part of the matrix numerical values */ 2245416022c9SBarry Smith nz = procsnz[0]; 22460752156aSBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2247d65a2f8fSBarry Smith 2248d65a2f8fSBarry Smith /* insert into matrix */ 2249d65a2f8fSBarry Smith jj = rstart; 2250d65a2f8fSBarry Smith smycols = mycols; 2251d65a2f8fSBarry Smith svals = vals; 2252d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 2253d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2254d65a2f8fSBarry Smith smycols += ourlens[i]; 2255d65a2f8fSBarry Smith svals += ourlens[i]; 2256d65a2f8fSBarry Smith jj++; 2257416022c9SBarry Smith } 2258416022c9SBarry Smith 2259d65a2f8fSBarry Smith /* read in other processors and ship out */ 226017699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 2261416022c9SBarry Smith nz = procsnz[i]; 22620752156aSBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2263ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2264416022c9SBarry Smith } 22650452661fSBarry Smith PetscFree(procsnz); 22663a40ed3dSBarry Smith } else { 2267d65a2f8fSBarry Smith /* receive numeric values */ 22680452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 2269416022c9SBarry Smith 2270d65a2f8fSBarry Smith /* receive message of values*/ 2271ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2272ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2273a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2274d65a2f8fSBarry Smith 2275d65a2f8fSBarry Smith /* insert into matrix */ 2276d65a2f8fSBarry Smith jj = rstart; 2277d65a2f8fSBarry Smith smycols = mycols; 2278d65a2f8fSBarry Smith svals = vals; 2279d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 2280d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2281d65a2f8fSBarry Smith smycols += ourlens[i]; 2282d65a2f8fSBarry Smith svals += ourlens[i]; 2283d65a2f8fSBarry Smith jj++; 2284d65a2f8fSBarry Smith } 2285d65a2f8fSBarry Smith } 22860452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 2287d65a2f8fSBarry Smith 22886d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 22896d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 22903a40ed3dSBarry Smith PetscFunctionReturn(0); 2291416022c9SBarry Smith } 2292a0ff6018SBarry Smith 229329da9460SBarry Smith #undef __FUNC__ 229429da9460SBarry Smith #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2295a0ff6018SBarry Smith /* 229629da9460SBarry Smith Not great since it makes two copies of the submatrix, first an SeqAIJ 229729da9460SBarry Smith in local and then by concatenating the local matrices the end result. 229829da9460SBarry Smith Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2299a0ff6018SBarry Smith */ 23007b2a1423SBarry Smith int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2301a0ff6018SBarry Smith { 230200e6dbe6SBarry Smith int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2303fee21e36SBarry Smith Mat *local,M, Mreuse; 230400e6dbe6SBarry Smith Scalar *vwork,*aa; 230500e6dbe6SBarry Smith MPI_Comm comm = mat->comm; 230600e6dbe6SBarry Smith Mat_SeqAIJ *aij; 230700e6dbe6SBarry Smith int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2308a0ff6018SBarry Smith 2309a0ff6018SBarry Smith PetscFunctionBegin; 231000e6dbe6SBarry Smith MPI_Comm_rank(comm,&rank); 231100e6dbe6SBarry Smith MPI_Comm_size(comm,&size); 231200e6dbe6SBarry Smith 2313fee21e36SBarry Smith if (call == MAT_REUSE_MATRIX) { 2314fee21e36SBarry Smith ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2315fee21e36SBarry Smith if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2316fee21e36SBarry Smith local = &Mreuse; 2317fee21e36SBarry Smith ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2318fee21e36SBarry Smith } else { 2319a0ff6018SBarry Smith ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2320fee21e36SBarry Smith Mreuse = *local; 2321fee21e36SBarry Smith PetscFree(local); 2322fee21e36SBarry Smith } 2323a0ff6018SBarry Smith 2324a0ff6018SBarry Smith /* 2325a0ff6018SBarry Smith m - number of local rows 2326a0ff6018SBarry Smith n - number of columns (same on all processors) 2327a0ff6018SBarry Smith rstart - first row in new global matrix generated 2328a0ff6018SBarry Smith */ 2329fee21e36SBarry Smith ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2330a0ff6018SBarry Smith if (call == MAT_INITIAL_MATRIX) { 2331fee21e36SBarry Smith aij = (Mat_SeqAIJ *) (Mreuse)->data; 2332a8c6a408SBarry Smith if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 233300e6dbe6SBarry Smith ii = aij->i; 233400e6dbe6SBarry Smith jj = aij->j; 233500e6dbe6SBarry Smith 2336a0ff6018SBarry Smith /* 233700e6dbe6SBarry Smith Determine the number of non-zeros in the diagonal and off-diagonal 233800e6dbe6SBarry Smith portions of the matrix in order to do correct preallocation 2339a0ff6018SBarry Smith */ 234000e6dbe6SBarry Smith 234100e6dbe6SBarry Smith /* first get start and end of "diagonal" columns */ 23426a6a5d1dSBarry Smith if (csize == PETSC_DECIDE) { 234300e6dbe6SBarry Smith nlocal = n/size + ((n % size) > rank); 23446a6a5d1dSBarry Smith } else { 23456a6a5d1dSBarry Smith nlocal = csize; 23466a6a5d1dSBarry Smith } 2347ca161407SBarry Smith ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 234800e6dbe6SBarry Smith rstart = rend - nlocal; 23496a6a5d1dSBarry Smith if (rank == size - 1 && rend != n) { 23506a6a5d1dSBarry Smith SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 23516a6a5d1dSBarry Smith } 235200e6dbe6SBarry Smith 235300e6dbe6SBarry Smith /* next, compute all the lengths */ 235400e6dbe6SBarry Smith dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 235500e6dbe6SBarry Smith olens = dlens + m; 235600e6dbe6SBarry Smith for ( i=0; i<m; i++ ) { 235700e6dbe6SBarry Smith jend = ii[i+1] - ii[i]; 235800e6dbe6SBarry Smith olen = 0; 235900e6dbe6SBarry Smith dlen = 0; 236000e6dbe6SBarry Smith for ( j=0; j<jend; j++ ) { 236100e6dbe6SBarry Smith if ( *jj < rstart || *jj >= rend) olen++; 236200e6dbe6SBarry Smith else dlen++; 236300e6dbe6SBarry Smith jj++; 236400e6dbe6SBarry Smith } 236500e6dbe6SBarry Smith olens[i] = olen; 236600e6dbe6SBarry Smith dlens[i] = dlen; 236700e6dbe6SBarry Smith } 23682d207970SBarry Smith ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 236900e6dbe6SBarry Smith PetscFree(dlens); 2370a0ff6018SBarry Smith } else { 2371a0ff6018SBarry Smith int ml,nl; 2372a0ff6018SBarry Smith 2373a0ff6018SBarry Smith M = *newmat; 2374a0ff6018SBarry Smith ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2375a8c6a408SBarry Smith if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2376a0ff6018SBarry Smith ierr = MatZeroEntries(M);CHKERRQ(ierr); 2377c48de900SBarry Smith /* 2378c48de900SBarry Smith The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2379c48de900SBarry Smith rather than the slower MatSetValues(). 2380c48de900SBarry Smith */ 2381c48de900SBarry Smith M->was_assembled = PETSC_TRUE; 2382c48de900SBarry Smith M->assembled = PETSC_FALSE; 2383a0ff6018SBarry Smith } 2384a0ff6018SBarry Smith ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2385fee21e36SBarry Smith aij = (Mat_SeqAIJ *) (Mreuse)->data; 2386a8c6a408SBarry Smith if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 238700e6dbe6SBarry Smith ii = aij->i; 238800e6dbe6SBarry Smith jj = aij->j; 238900e6dbe6SBarry Smith aa = aij->a; 2390a0ff6018SBarry Smith for (i=0; i<m; i++) { 2391a0ff6018SBarry Smith row = rstart + i; 239200e6dbe6SBarry Smith nz = ii[i+1] - ii[i]; 239300e6dbe6SBarry Smith cwork = jj; jj += nz; 239400e6dbe6SBarry Smith vwork = aa; aa += nz; 23958c638d02SBarry Smith ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2396a0ff6018SBarry Smith } 2397a0ff6018SBarry Smith 2398a0ff6018SBarry Smith ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2399a0ff6018SBarry Smith ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2400a0ff6018SBarry Smith *newmat = M; 2401fee21e36SBarry Smith 2402fee21e36SBarry Smith /* save submatrix used in processor for next request */ 2403fee21e36SBarry Smith if (call == MAT_INITIAL_MATRIX) { 2404fee21e36SBarry Smith ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2405fee21e36SBarry Smith ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2406fee21e36SBarry Smith } 2407fee21e36SBarry Smith 2408a0ff6018SBarry Smith PetscFunctionReturn(0); 2409a0ff6018SBarry Smith } 2410