1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*83e2fdc7SBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.210 1997/07/16 22:00:13 balay Exp bsmith $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 53369ce9aSBarry Smith #include "pinclude/pviewer.h" 670f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h" 7f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 8d9942c19SSatish Balay #include "src/inline/spops.h" 98a729477SBarry Smith 109e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 119e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 129e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 139e25ed09SBarry Smith length of colmap equals the global matrix length. 149e25ed09SBarry Smith */ 155615d1e5SSatish Balay #undef __FUNC__ 165eea60f9SBarry Smith #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */ 170a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat) 189e25ed09SBarry Smith { 1944a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 20ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 21905e6a2fSBarry Smith int n = B->n,i; 22dbb450caSBarry Smith 23758f045eSSatish Balay aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 24464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 25cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 26905e6a2fSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 279e25ed09SBarry Smith return 0; 289e25ed09SBarry Smith } 299e25ed09SBarry Smith 302493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 312493cbb0SBarry Smith 325615d1e5SSatish Balay #undef __FUNC__ 335615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIAIJ" 348f6be9afSLois Curfman McInnes int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 353b2fbd54SBarry Smith PetscTruth *done) 36299609e3SLois Curfman McInnes { 37299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 38299609e3SLois Curfman McInnes int ierr; 3917699dbbSLois Curfman McInnes if (aij->size == 1) { 403b2fbd54SBarry Smith ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 41e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 423b2fbd54SBarry Smith return 0; 433b2fbd54SBarry Smith } 443b2fbd54SBarry Smith 455615d1e5SSatish Balay #undef __FUNC__ 465eea60f9SBarry Smith #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */ 478f6be9afSLois Curfman McInnes int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 483b2fbd54SBarry Smith PetscTruth *done) 493b2fbd54SBarry Smith { 503b2fbd54SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 513b2fbd54SBarry Smith int ierr; 523b2fbd54SBarry Smith if (aij->size == 1) { 533b2fbd54SBarry Smith ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 54e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 55299609e3SLois Curfman McInnes return 0; 56299609e3SLois Curfman McInnes } 57299609e3SLois Curfman McInnes 580520107fSSatish Balay #define CHUNKSIZE 15 5930770e4dSSatish Balay #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 600520107fSSatish Balay { \ 610520107fSSatish Balay \ 620520107fSSatish Balay rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 6330770e4dSSatish Balay rmax = aimax[row]; nrow = ailen[row]; \ 64f5e9677aSSatish Balay col1 = col - shift; \ 65f5e9677aSSatish Balay \ 66ba4e3ef2SSatish Balay low = 0; high = nrow; \ 67ba4e3ef2SSatish Balay while (high-low > 5) { \ 68ba4e3ef2SSatish Balay t = (low+high)/2; \ 69ba4e3ef2SSatish Balay if (rp[t] > col) high = t; \ 70ba4e3ef2SSatish Balay else low = t; \ 71ba4e3ef2SSatish Balay } \ 720520107fSSatish Balay for ( _i=0; _i<nrow; _i++ ) { \ 73f5e9677aSSatish Balay if (rp[_i] > col1) break; \ 74f5e9677aSSatish Balay if (rp[_i] == col1) { \ 750520107fSSatish Balay if (addv == ADD_VALUES) ap[_i] += value; \ 760520107fSSatish Balay else ap[_i] = value; \ 7730770e4dSSatish Balay goto a_noinsert; \ 780520107fSSatish Balay } \ 790520107fSSatish Balay } \ 8089280ab3SLois Curfman McInnes if (nonew == 1) goto a_noinsert; \ 8111ebbc71SLois Curfman McInnes else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 820520107fSSatish Balay if (nrow >= rmax) { \ 830520107fSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 840520107fSSatish Balay int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 850520107fSSatish Balay Scalar *new_a; \ 860520107fSSatish Balay \ 8711ebbc71SLois Curfman McInnes if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 8889280ab3SLois Curfman McInnes \ 890520107fSSatish Balay /* malloc new storage space */ \ 900520107fSSatish Balay len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 910520107fSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 920520107fSSatish Balay new_j = (int *) (new_a + new_nz); \ 930520107fSSatish Balay new_i = new_j + new_nz; \ 940520107fSSatish Balay \ 950520107fSSatish Balay /* copy over old data into new slots */ \ 960520107fSSatish Balay for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 970520107fSSatish Balay for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 980520107fSSatish Balay PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 990520107fSSatish Balay len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 1000520107fSSatish Balay PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 1010520107fSSatish Balay len*sizeof(int)); \ 1020520107fSSatish Balay PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 1030520107fSSatish Balay PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 1040520107fSSatish Balay len*sizeof(Scalar)); \ 1050520107fSSatish Balay /* free up old matrix storage */ \ 106f5e9677aSSatish Balay \ 1070520107fSSatish Balay PetscFree(a->a); \ 1080520107fSSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 1090520107fSSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 1100520107fSSatish Balay a->singlemalloc = 1; \ 1110520107fSSatish Balay \ 1120520107fSSatish Balay rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 11330770e4dSSatish Balay rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 1140520107fSSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 1150520107fSSatish Balay a->maxnz += CHUNKSIZE; \ 1160520107fSSatish Balay a->reallocs++; \ 1170520107fSSatish Balay } \ 1180520107fSSatish Balay N = nrow++ - 1; a->nz++; \ 1190520107fSSatish Balay /* shift up all the later entries in this row */ \ 1200520107fSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 1210520107fSSatish Balay rp[ii+1] = rp[ii]; \ 1220520107fSSatish Balay ap[ii+1] = ap[ii]; \ 1230520107fSSatish Balay } \ 124f5e9677aSSatish Balay rp[_i] = col1; \ 1250520107fSSatish Balay ap[_i] = value; \ 12630770e4dSSatish Balay a_noinsert: ; \ 1270520107fSSatish Balay ailen[row] = nrow; \ 1280520107fSSatish Balay } 1290a198c4cSBarry Smith 13030770e4dSSatish Balay #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 13130770e4dSSatish Balay { \ 13230770e4dSSatish Balay \ 13330770e4dSSatish Balay rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 13430770e4dSSatish Balay rmax = bimax[row]; nrow = bilen[row]; \ 13530770e4dSSatish Balay col1 = col - shift; \ 13630770e4dSSatish Balay \ 137ba4e3ef2SSatish Balay low = 0; high = nrow; \ 138ba4e3ef2SSatish Balay while (high-low > 5) { \ 139ba4e3ef2SSatish Balay t = (low+high)/2; \ 140ba4e3ef2SSatish Balay if (rp[t] > col) high = t; \ 141ba4e3ef2SSatish Balay else low = t; \ 142ba4e3ef2SSatish Balay } \ 14330770e4dSSatish Balay for ( _i=0; _i<nrow; _i++ ) { \ 14430770e4dSSatish Balay if (rp[_i] > col1) break; \ 14530770e4dSSatish Balay if (rp[_i] == col1) { \ 14630770e4dSSatish Balay if (addv == ADD_VALUES) ap[_i] += value; \ 14730770e4dSSatish Balay else ap[_i] = value; \ 14830770e4dSSatish Balay goto b_noinsert; \ 14930770e4dSSatish Balay } \ 15030770e4dSSatish Balay } \ 15189280ab3SLois Curfman McInnes if (nonew == 1) goto b_noinsert; \ 15211ebbc71SLois Curfman McInnes else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 15330770e4dSSatish Balay if (nrow >= rmax) { \ 15430770e4dSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 15574c639caSSatish Balay int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 15630770e4dSSatish Balay Scalar *new_a; \ 15730770e4dSSatish Balay \ 15811ebbc71SLois Curfman McInnes if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 15989280ab3SLois Curfman McInnes \ 16030770e4dSSatish Balay /* malloc new storage space */ \ 16174c639caSSatish Balay len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 16230770e4dSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 16330770e4dSSatish Balay new_j = (int *) (new_a + new_nz); \ 16430770e4dSSatish Balay new_i = new_j + new_nz; \ 16530770e4dSSatish Balay \ 16630770e4dSSatish Balay /* copy over old data into new slots */ \ 16730770e4dSSatish Balay for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 16874c639caSSatish Balay for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 16930770e4dSSatish Balay PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 17030770e4dSSatish Balay len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 17130770e4dSSatish Balay PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 17230770e4dSSatish Balay len*sizeof(int)); \ 17330770e4dSSatish Balay PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 17430770e4dSSatish Balay PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 17530770e4dSSatish Balay len*sizeof(Scalar)); \ 17630770e4dSSatish Balay /* free up old matrix storage */ \ 17730770e4dSSatish Balay \ 17874c639caSSatish Balay PetscFree(b->a); \ 17974c639caSSatish Balay if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 18074c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 18174c639caSSatish Balay b->singlemalloc = 1; \ 18230770e4dSSatish Balay \ 18330770e4dSSatish Balay rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 18430770e4dSSatish Balay rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 18574c639caSSatish Balay PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 18674c639caSSatish Balay b->maxnz += CHUNKSIZE; \ 18774c639caSSatish Balay b->reallocs++; \ 18830770e4dSSatish Balay } \ 18974c639caSSatish Balay N = nrow++ - 1; b->nz++; \ 19030770e4dSSatish Balay /* shift up all the later entries in this row */ \ 19130770e4dSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 19230770e4dSSatish Balay rp[ii+1] = rp[ii]; \ 19330770e4dSSatish Balay ap[ii+1] = ap[ii]; \ 19430770e4dSSatish Balay } \ 19530770e4dSSatish Balay rp[_i] = col1; \ 19630770e4dSSatish Balay ap[_i] = value; \ 19730770e4dSSatish Balay b_noinsert: ; \ 19830770e4dSSatish Balay bilen[row] = nrow; \ 19930770e4dSSatish Balay } 20030770e4dSSatish Balay 2010520107fSSatish Balay extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 2025615d1e5SSatish Balay #undef __FUNC__ 2035615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIAIJ" 2048f6be9afSLois Curfman McInnes int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 2058a729477SBarry Smith { 20644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 2074b0e389bSBarry Smith Scalar value; 2081eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 2091eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 210905e6a2fSBarry Smith int roworiented = aij->roworiented; 2118a729477SBarry Smith 2120520107fSSatish Balay /* Some Variables required in the macro */ 2134ee7247eSSatish Balay Mat A = aij->A; 2144ee7247eSSatish Balay Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 21530770e4dSSatish Balay int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 21630770e4dSSatish Balay Scalar *aa = a->a; 21730770e4dSSatish Balay 21830770e4dSSatish Balay Mat B = aij->B; 21930770e4dSSatish Balay Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 22030770e4dSSatish Balay int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 22130770e4dSSatish Balay Scalar *ba = b->a; 22230770e4dSSatish Balay 223ba4e3ef2SSatish Balay int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 22430770e4dSSatish Balay int nonew = a->nonew,shift = a->indexshift; 22530770e4dSSatish Balay Scalar *ap; 2264ee7247eSSatish Balay 2278a729477SBarry Smith for ( i=0; i<m; i++ ) { 2280a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 229e3372554SBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 230e3372554SBarry Smith if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 2310a198c4cSBarry Smith #endif 2324b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 2334b0e389bSBarry Smith row = im[i] - rstart; 2341eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 2354b0e389bSBarry Smith if (in[j] >= cstart && in[j] < cend){ 2364b0e389bSBarry Smith col = in[j] - cstart; 2374b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 23830770e4dSSatish Balay MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 2390520107fSSatish Balay /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 2401eb62cbbSBarry Smith } 2410a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 242e3372554SBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 243e3372554SBarry Smith else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 2440a198c4cSBarry Smith #endif 2451eb62cbbSBarry Smith else { 246227d817aSBarry Smith if (mat->was_assembled) { 247905e6a2fSBarry Smith if (!aij->colmap) { 248905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 249905e6a2fSBarry Smith } 250905e6a2fSBarry Smith col = aij->colmap[in[j]] - 1; 251ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 2522493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2534b0e389bSBarry Smith col = in[j]; 2549bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 255f9508a3cSSatish Balay B = aij->B; 256f9508a3cSSatish Balay b = (Mat_SeqAIJ *) B->data; 257f9508a3cSSatish Balay bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 258f9508a3cSSatish Balay ba = b->a; 259d6dfbf8fSBarry Smith } 2609e25ed09SBarry Smith } 2614b0e389bSBarry Smith else col = in[j]; 2624b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 26330770e4dSSatish Balay MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 26430770e4dSSatish Balay /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 2651eb62cbbSBarry Smith } 2661eb62cbbSBarry Smith } 2671eb62cbbSBarry Smith } 2681eb62cbbSBarry Smith else { 26990f02eecSBarry Smith if (roworiented && !aij->donotstash) { 2704b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 2714b0e389bSBarry Smith } 2724b0e389bSBarry Smith else { 27390f02eecSBarry Smith if (!aij->donotstash) { 2744b0e389bSBarry Smith row = im[i]; 2754b0e389bSBarry Smith for ( j=0; j<n; j++ ) { 2764b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 2774b0e389bSBarry Smith } 2784b0e389bSBarry Smith } 2791eb62cbbSBarry Smith } 2808a729477SBarry Smith } 28190f02eecSBarry Smith } 2828a729477SBarry Smith return 0; 2838a729477SBarry Smith } 2848a729477SBarry Smith 2855615d1e5SSatish Balay #undef __FUNC__ 2865615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIAIJ" 2878f6be9afSLois Curfman McInnes int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 288b49de8d1SLois Curfman McInnes { 289b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 290b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 291b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 292b49de8d1SLois Curfman McInnes 293b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 294e3372554SBarry Smith if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 295e3372554SBarry Smith if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 296b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 297b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 298b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 299e3372554SBarry Smith if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 300e3372554SBarry Smith if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 301b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 302b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 303b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 304b49de8d1SLois Curfman McInnes } 305b49de8d1SLois Curfman McInnes else { 306905e6a2fSBarry Smith if (!aij->colmap) { 307905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 308905e6a2fSBarry Smith } 309905e6a2fSBarry Smith col = aij->colmap[idxn[j]] - 1; 310e60e1c95SSatish Balay if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 311d9d09a02SSatish Balay else { 312b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 313b49de8d1SLois Curfman McInnes } 314b49de8d1SLois Curfman McInnes } 315b49de8d1SLois Curfman McInnes } 316d9d09a02SSatish Balay } 317b49de8d1SLois Curfman McInnes else { 318e3372554SBarry Smith SETERRQ(1,0,"Only local values currently supported"); 319b49de8d1SLois Curfman McInnes } 320b49de8d1SLois Curfman McInnes } 321b49de8d1SLois Curfman McInnes return 0; 322b49de8d1SLois Curfman McInnes } 323b49de8d1SLois Curfman McInnes 3245615d1e5SSatish Balay #undef __FUNC__ 3255615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 3268f6be9afSLois Curfman McInnes int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 3278a729477SBarry Smith { 32844a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 329d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 33017699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 33117699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 3321eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 3336abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 3341eb62cbbSBarry Smith InsertMode addv; 3351eb62cbbSBarry Smith Scalar *rvalues,*svalues; 3361eb62cbbSBarry Smith 3371eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 33847794344SBarry Smith MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 339dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 340e3372554SBarry Smith SETERRQ(1,0,"Some processors inserted others added"); 3411eb62cbbSBarry Smith } 34247794344SBarry Smith mat->insertmode = addv; /* in case this processor had no cache */ 3431eb62cbbSBarry Smith 3441eb62cbbSBarry Smith /* first count number of contributors to each processor */ 3450452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 346cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 3470452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 3481eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 3491eb62cbbSBarry Smith idx = aij->stash.idx[i]; 35017699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 3511eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3521eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 3538a729477SBarry Smith } 3548a729477SBarry Smith } 3558a729477SBarry Smith } 35617699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 3571eb62cbbSBarry Smith 3581eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3590452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 36017699dbbSLois Curfman McInnes MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 36117699dbbSLois Curfman McInnes nreceives = work[rank]; 36217699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 36317699dbbSLois Curfman McInnes nmax = work[rank]; 3640452661fSBarry Smith PetscFree(work); 3651eb62cbbSBarry Smith 3661eb62cbbSBarry Smith /* post receives: 3671eb62cbbSBarry Smith 1) each message will consist of ordered pairs 3681eb62cbbSBarry Smith (global index,value) we store the global index as a double 369d6dfbf8fSBarry Smith to simplify the message passing. 3701eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 3711eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 3721eb62cbbSBarry Smith this is a lot of wasted space. 3731eb62cbbSBarry Smith 3741eb62cbbSBarry Smith 3751eb62cbbSBarry Smith This could be done better. 3761eb62cbbSBarry Smith */ 3770452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 37878b31e54SBarry Smith CHKPTRQ(rvalues); 3790452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 38078b31e54SBarry Smith CHKPTRQ(recv_waits); 3811eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 382416022c9SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 3831eb62cbbSBarry Smith comm,recv_waits+i); 3841eb62cbbSBarry Smith } 3851eb62cbbSBarry Smith 3861eb62cbbSBarry Smith /* do sends: 3871eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3881eb62cbbSBarry Smith the ith processor 3891eb62cbbSBarry Smith */ 3900452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 3910452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 39278b31e54SBarry Smith CHKPTRQ(send_waits); 3930452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 3941eb62cbbSBarry Smith starts[0] = 0; 39517699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3961eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 3971eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 3981eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 3991eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 4001eb62cbbSBarry Smith } 4010452661fSBarry Smith PetscFree(owner); 4021eb62cbbSBarry Smith starts[0] = 0; 40317699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 4041eb62cbbSBarry Smith count = 0; 40517699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 4061eb62cbbSBarry Smith if (procs[i]) { 407416022c9SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 4081eb62cbbSBarry Smith comm,send_waits+count++); 4091eb62cbbSBarry Smith } 4101eb62cbbSBarry Smith } 4110452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 4121eb62cbbSBarry Smith 4131eb62cbbSBarry Smith /* Free cache space */ 41455908cccSLois Curfman McInnes PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 41578b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 4161eb62cbbSBarry Smith 4171eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 4181eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 4191eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 4201eb62cbbSBarry Smith aij->rmax = nmax; 4211eb62cbbSBarry Smith 4221eb62cbbSBarry Smith return 0; 4231eb62cbbSBarry Smith } 42444a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 4251eb62cbbSBarry Smith 4265615d1e5SSatish Balay #undef __FUNC__ 4275615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 4288f6be9afSLois Curfman McInnes int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 4291eb62cbbSBarry Smith { 43044a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 4311eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 432416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 433905e6a2fSBarry Smith int row,col,other_disassembled; 4341eb62cbbSBarry Smith Scalar *values,val; 43547794344SBarry Smith InsertMode addv = mat->insertmode; 4361eb62cbbSBarry Smith 4371eb62cbbSBarry Smith /* wait on receives */ 4381eb62cbbSBarry Smith while (count) { 439d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 4401eb62cbbSBarry Smith /* unpack receives into our local space */ 441d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 442c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 4431eb62cbbSBarry Smith n = n/3; 4441eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 445227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - aij->rstart; 446227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 4471eb62cbbSBarry Smith val = values[3*i+2]; 4481eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 4491eb62cbbSBarry Smith col -= aij->cstart; 4506fd7127cSSatish Balay ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 4511eb62cbbSBarry Smith } 4521eb62cbbSBarry Smith else { 45355a1b374SBarry Smith if (mat->was_assembled || mat->assembled) { 454905e6a2fSBarry Smith if (!aij->colmap) { 455905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 456905e6a2fSBarry Smith } 457905e6a2fSBarry Smith col = aij->colmap[col] - 1; 458ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4592493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 460227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 461d6dfbf8fSBarry Smith } 4629e25ed09SBarry Smith } 4636fd7127cSSatish Balay ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 4641eb62cbbSBarry Smith } 4651eb62cbbSBarry Smith } 4661eb62cbbSBarry Smith count--; 4671eb62cbbSBarry Smith } 4680452661fSBarry Smith PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 4691eb62cbbSBarry Smith 4701eb62cbbSBarry Smith /* wait on sends */ 4711eb62cbbSBarry Smith if (aij->nsends) { 4720a198c4cSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 4731eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 4740452661fSBarry Smith PetscFree(send_status); 4751eb62cbbSBarry Smith } 4760452661fSBarry Smith PetscFree(aij->send_waits); PetscFree(aij->svalues); 4771eb62cbbSBarry Smith 47878b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 47978b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 4801eb62cbbSBarry Smith 4812493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 4822493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 483227d817aSBarry Smith MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 484227d817aSBarry Smith if (mat->was_assembled && !other_disassembled) { 4852493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 4862493cbb0SBarry Smith } 4872493cbb0SBarry Smith 4886d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 48978b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 4905e42470aSBarry Smith } 49178b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 49278b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 4935e42470aSBarry Smith 4947a0afa10SBarry Smith if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 4958a729477SBarry Smith return 0; 4968a729477SBarry Smith } 4978a729477SBarry Smith 4985615d1e5SSatish Balay #undef __FUNC__ 4995615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIAIJ" 5008f6be9afSLois Curfman McInnes int MatZeroEntries_MPIAIJ(Mat A) 5011eb62cbbSBarry Smith { 50244a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 503dbd7a890SLois Curfman McInnes int ierr; 50478b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 50578b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 5061eb62cbbSBarry Smith return 0; 5071eb62cbbSBarry Smith } 5081eb62cbbSBarry Smith 5091eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 5101eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 5111eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 5121eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 5131eb62cbbSBarry Smith routine. 5141eb62cbbSBarry Smith */ 5155615d1e5SSatish Balay #undef __FUNC__ 5165615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIAIJ" 5178f6be9afSLois Curfman McInnes int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 5181eb62cbbSBarry Smith { 51944a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 52017699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 5216abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 52217699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 5235392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 524d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 525d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 5261eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 5271eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 5281eb62cbbSBarry Smith IS istmp; 5291eb62cbbSBarry Smith 53077c4ece6SBarry Smith ierr = ISGetSize(is,&N); CHKERRQ(ierr); 53178b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 5321eb62cbbSBarry Smith 5331eb62cbbSBarry Smith /* first count number of contributors to each processor */ 5340452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 535cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 5360452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 5371eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 5381eb62cbbSBarry Smith idx = rows[i]; 5391eb62cbbSBarry Smith found = 0; 54017699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 5411eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 5421eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 5431eb62cbbSBarry Smith } 5441eb62cbbSBarry Smith } 545e3372554SBarry Smith if (!found) SETERRQ(1,0,"Index out of range"); 5461eb62cbbSBarry Smith } 54717699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 5481eb62cbbSBarry Smith 5491eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 5500452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 55117699dbbSLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 55217699dbbSLois Curfman McInnes nrecvs = work[rank]; 55317699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 55417699dbbSLois Curfman McInnes nmax = work[rank]; 5550452661fSBarry Smith PetscFree(work); 5561eb62cbbSBarry Smith 5571eb62cbbSBarry Smith /* post receives: */ 5580452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 55978b31e54SBarry Smith CHKPTRQ(rvalues); 5600452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 56178b31e54SBarry Smith CHKPTRQ(recv_waits); 5621eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 563416022c9SBarry Smith MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 5641eb62cbbSBarry Smith } 5651eb62cbbSBarry Smith 5661eb62cbbSBarry Smith /* do sends: 5671eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 5681eb62cbbSBarry Smith the ith processor 5691eb62cbbSBarry Smith */ 5700452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 5710452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 57278b31e54SBarry Smith CHKPTRQ(send_waits); 5730452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 5741eb62cbbSBarry Smith starts[0] = 0; 57517699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 5761eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 5771eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 5781eb62cbbSBarry Smith } 5791eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 5801eb62cbbSBarry Smith 5811eb62cbbSBarry Smith starts[0] = 0; 58217699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 5831eb62cbbSBarry Smith count = 0; 58417699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 5851eb62cbbSBarry Smith if (procs[i]) { 586416022c9SBarry Smith MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 5871eb62cbbSBarry Smith } 5881eb62cbbSBarry Smith } 5890452661fSBarry Smith PetscFree(starts); 5901eb62cbbSBarry Smith 59117699dbbSLois Curfman McInnes base = owners[rank]; 5921eb62cbbSBarry Smith 5931eb62cbbSBarry Smith /* wait on receives */ 5940452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 5951eb62cbbSBarry Smith source = lens + nrecvs; 5961eb62cbbSBarry Smith count = nrecvs; slen = 0; 5971eb62cbbSBarry Smith while (count) { 598d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 5991eb62cbbSBarry Smith /* unpack receives into our local space */ 6001eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 601d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 602d6dfbf8fSBarry Smith lens[imdex] = n; 6031eb62cbbSBarry Smith slen += n; 6041eb62cbbSBarry Smith count--; 6051eb62cbbSBarry Smith } 6060452661fSBarry Smith PetscFree(recv_waits); 6071eb62cbbSBarry Smith 6081eb62cbbSBarry Smith /* move the data into the send scatter */ 6090452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 6101eb62cbbSBarry Smith count = 0; 6111eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 6121eb62cbbSBarry Smith values = rvalues + i*nmax; 6131eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 6141eb62cbbSBarry Smith lrows[count++] = values[j] - base; 6151eb62cbbSBarry Smith } 6161eb62cbbSBarry Smith } 6170452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 6180452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 6191eb62cbbSBarry Smith 6201eb62cbbSBarry Smith /* actually zap the local rows */ 621029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 622464493b3SBarry Smith PLogObjectParent(A,istmp); 6230452661fSBarry Smith PetscFree(lrows); 62478b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 62578b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 62678b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 6271eb62cbbSBarry Smith 6281eb62cbbSBarry Smith /* wait on sends */ 6291eb62cbbSBarry Smith if (nsends) { 6300452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 63178b31e54SBarry Smith CHKPTRQ(send_status); 6321eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 6330452661fSBarry Smith PetscFree(send_status); 6341eb62cbbSBarry Smith } 6350452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 6361eb62cbbSBarry Smith 6371eb62cbbSBarry Smith return 0; 6381eb62cbbSBarry Smith } 6391eb62cbbSBarry Smith 6405615d1e5SSatish Balay #undef __FUNC__ 6415615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIAIJ" 6428f6be9afSLois Curfman McInnes int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 6431eb62cbbSBarry Smith { 644416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 645fbd6ef76SBarry Smith int ierr,nt; 646416022c9SBarry Smith 647a2ce50c7SBarry Smith ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 648fbd6ef76SBarry Smith if (nt != a->n) { 649f40265daSLois Curfman McInnes SETERRQ(1,0,"Incompatible partition of A and xx"); 650fbd6ef76SBarry Smith } 65143a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 65238597bd4SSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 65343a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 65438597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 6551eb62cbbSBarry Smith return 0; 6561eb62cbbSBarry Smith } 6571eb62cbbSBarry Smith 6585615d1e5SSatish Balay #undef __FUNC__ 6595615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIAIJ" 6608f6be9afSLois Curfman McInnes int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 661da3a660dSBarry Smith { 662416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 663da3a660dSBarry Smith int ierr; 66443a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 66538597bd4SSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 66643a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 66738597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 668da3a660dSBarry Smith return 0; 669da3a660dSBarry Smith } 670da3a660dSBarry Smith 6715615d1e5SSatish Balay #undef __FUNC__ 6725615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIAIJ" 6738f6be9afSLois Curfman McInnes int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 674da3a660dSBarry Smith { 675416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 676da3a660dSBarry Smith int ierr; 677da3a660dSBarry Smith 678da3a660dSBarry Smith /* do nondiagonal part */ 67938597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 680da3a660dSBarry Smith /* send it on its way */ 681537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 682da3a660dSBarry Smith /* do local part */ 68338597bd4SSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 684da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 685da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 686da3a660dSBarry Smith /* but is not perhaps always true. */ 687537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 688da3a660dSBarry Smith return 0; 689da3a660dSBarry Smith } 690da3a660dSBarry Smith 6915615d1e5SSatish Balay #undef __FUNC__ 6925615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIAIJ" 6938f6be9afSLois Curfman McInnes int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 694da3a660dSBarry Smith { 695416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 696da3a660dSBarry Smith int ierr; 697da3a660dSBarry Smith 698da3a660dSBarry Smith /* do nondiagonal part */ 69938597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 700da3a660dSBarry Smith /* send it on its way */ 701537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 702da3a660dSBarry Smith /* do local part */ 70338597bd4SSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 704da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 705da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 706da3a660dSBarry Smith /* but is not perhaps always true. */ 7070a198c4cSBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 708da3a660dSBarry Smith return 0; 709da3a660dSBarry Smith } 710da3a660dSBarry Smith 7111eb62cbbSBarry Smith /* 7121eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 7131eb62cbbSBarry Smith diagonal block 7141eb62cbbSBarry Smith */ 7155615d1e5SSatish Balay #undef __FUNC__ 7165615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIAIJ" 7178f6be9afSLois Curfman McInnes int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 7181eb62cbbSBarry Smith { 719416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 72044cd7ae7SLois Curfman McInnes if (a->M != a->N) 721e3372554SBarry Smith SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 7225baf8537SBarry Smith if (a->rstart != a->cstart || a->rend != a->cend) { 723e3372554SBarry Smith SETERRQ(1,0,"row partition must equal col partition"); } 724416022c9SBarry Smith return MatGetDiagonal(a->A,v); 7251eb62cbbSBarry Smith } 7261eb62cbbSBarry Smith 7275615d1e5SSatish Balay #undef __FUNC__ 7285615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIAIJ" 7298f6be9afSLois Curfman McInnes int MatScale_MPIAIJ(Scalar *aa,Mat A) 730052efed2SBarry Smith { 731052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 732052efed2SBarry Smith int ierr; 733052efed2SBarry Smith ierr = MatScale(aa,a->A); CHKERRQ(ierr); 734052efed2SBarry Smith ierr = MatScale(aa,a->B); CHKERRQ(ierr); 735052efed2SBarry Smith return 0; 736052efed2SBarry Smith } 737052efed2SBarry Smith 7385615d1e5SSatish Balay #undef __FUNC__ 7395eea60f9SBarry Smith #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */ 7408f6be9afSLois Curfman McInnes int MatDestroy_MPIAIJ(PetscObject obj) 7411eb62cbbSBarry Smith { 7421eb62cbbSBarry Smith Mat mat = (Mat) obj; 74344a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 7441eb62cbbSBarry Smith int ierr; 745*83e2fdc7SBarry Smith 746a5a9c739SBarry Smith #if defined(PETSC_LOG) 7476e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 748a5a9c739SBarry Smith #endif 749*83e2fdc7SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 7500452661fSBarry Smith PetscFree(aij->rowners); 75178b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 75278b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 7530452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 7540452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 7551eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 756a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 7577a0afa10SBarry Smith if (aij->rowvalues) PetscFree(aij->rowvalues); 7580452661fSBarry Smith PetscFree(aij); 75990f02eecSBarry Smith if (mat->mapping) { 76090f02eecSBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 76190f02eecSBarry Smith } 762a5a9c739SBarry Smith PLogObjectDestroy(mat); 7630452661fSBarry Smith PetscHeaderDestroy(mat); 7641eb62cbbSBarry Smith return 0; 7651eb62cbbSBarry Smith } 766ee50ffe9SBarry Smith 7675615d1e5SSatish Balay #undef __FUNC__ 7685eea60f9SBarry Smith #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */ 7698f6be9afSLois Curfman McInnes extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 7701eb62cbbSBarry Smith { 771416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 772416022c9SBarry Smith int ierr; 773416022c9SBarry Smith 77417699dbbSLois Curfman McInnes if (aij->size == 1) { 775416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 776416022c9SBarry Smith } 777e3372554SBarry Smith else SETERRQ(1,0,"Only uniprocessor output supported"); 778416022c9SBarry Smith return 0; 779416022c9SBarry Smith } 780416022c9SBarry Smith 7815615d1e5SSatish Balay #undef __FUNC__ 7825eea60f9SBarry Smith #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */ 7838f6be9afSLois Curfman McInnes extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 784416022c9SBarry Smith { 78544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 786dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 787a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 788d636dbe3SBarry Smith FILE *fd; 78919bcc07fSBarry Smith ViewerType vtype; 790416022c9SBarry Smith 79119bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 79219bcc07fSBarry Smith if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 79390ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 7940a198c4cSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 7954e220ebcSLois Curfman McInnes MatInfo info; 7964e220ebcSLois Curfman McInnes int flg; 797a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 79890ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 7994e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 80095e01e2fSLois Curfman McInnes ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 80177c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 80295e01e2fSLois Curfman McInnes if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 8034e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 80495e01e2fSLois Curfman McInnes else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 8054e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 8064e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 8074e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 8084e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 8094e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 810a56f8943SBarry Smith fflush(fd); 81177c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 812a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 813416022c9SBarry Smith return 0; 814416022c9SBarry Smith } 8150a198c4cSBarry Smith else if (format == VIEWER_FORMAT_ASCII_INFO) { 81608480c60SBarry Smith return 0; 81708480c60SBarry Smith } 818416022c9SBarry Smith } 819416022c9SBarry Smith 82019bcc07fSBarry Smith if (vtype == DRAW_VIEWER) { 82119bcc07fSBarry Smith Draw draw; 82219bcc07fSBarry Smith PetscTruth isnull; 82319bcc07fSBarry Smith ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 82419bcc07fSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 82519bcc07fSBarry Smith } 82619bcc07fSBarry Smith 82719bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER) { 82890ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 82977c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 830d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 83117699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 8321eb62cbbSBarry Smith aij->cend); 83378b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 83478b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 835d13ab20cSBarry Smith fflush(fd); 83677c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 837d13ab20cSBarry Smith } 838416022c9SBarry Smith else { 839a56f8943SBarry Smith int size = aij->size; 840a56f8943SBarry Smith rank = aij->rank; 84117699dbbSLois Curfman McInnes if (size == 1) { 84278b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 84395373324SBarry Smith } 84495373324SBarry Smith else { 84595373324SBarry Smith /* assemble the entire matrix onto first processor. */ 84695373324SBarry Smith Mat A; 847ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 8482eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 84995373324SBarry Smith Scalar *a; 8502ee70a88SLois Curfman McInnes 85117699dbbSLois Curfman McInnes if (!rank) { 852b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 853c451ab25SLois Curfman McInnes CHKERRQ(ierr); 85495373324SBarry Smith } 85595373324SBarry Smith else { 856b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 857c451ab25SLois Curfman McInnes CHKERRQ(ierr); 85895373324SBarry Smith } 859464493b3SBarry Smith PLogObjectParent(mat,A); 860416022c9SBarry Smith 86195373324SBarry Smith /* copy over the A part */ 862ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 8632ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 86495373324SBarry Smith row = aij->rstart; 865dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 86695373324SBarry Smith for ( i=0; i<m; i++ ) { 867416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 86895373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 86995373324SBarry Smith } 8702ee70a88SLois Curfman McInnes aj = Aloc->j; 871dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 87295373324SBarry Smith 87395373324SBarry Smith /* copy over the B part */ 874ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 8752ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 87695373324SBarry Smith row = aij->rstart; 8770452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 878dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 87995373324SBarry Smith for ( i=0; i<m; i++ ) { 880416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 88195373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 88295373324SBarry Smith } 8830452661fSBarry Smith PetscFree(ct); 8846d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 8856d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 88617699dbbSLois Curfman McInnes if (!rank) { 88778b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 88895373324SBarry Smith } 88978b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 89095373324SBarry Smith } 89195373324SBarry Smith } 8921eb62cbbSBarry Smith return 0; 8931eb62cbbSBarry Smith } 8941eb62cbbSBarry Smith 8955615d1e5SSatish Balay #undef __FUNC__ 8965eea60f9SBarry Smith #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */ 8978f6be9afSLois Curfman McInnes int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 898416022c9SBarry Smith { 899416022c9SBarry Smith Mat mat = (Mat) obj; 900416022c9SBarry Smith int ierr; 90119bcc07fSBarry Smith ViewerType vtype; 902416022c9SBarry Smith 90319bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 90419bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 90519bcc07fSBarry Smith vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 906d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 907416022c9SBarry Smith } 90819bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 909416022c9SBarry Smith return MatView_MPIAIJ_Binary(mat,viewer); 910416022c9SBarry Smith } 911416022c9SBarry Smith return 0; 912416022c9SBarry Smith } 913416022c9SBarry Smith 9141eb62cbbSBarry Smith /* 9151eb62cbbSBarry Smith This has to provide several versions. 9161eb62cbbSBarry Smith 9171eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 9181eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 919d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 9201eb62cbbSBarry Smith */ 9215615d1e5SSatish Balay #undef __FUNC__ 9225615d1e5SSatish Balay #define __FUNC__ "MatRelax_MPIAIJ" 9238f6be9afSLois Curfman McInnes int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 924dbb450caSBarry Smith double fshift,int its,Vec xx) 9258a729477SBarry Smith { 92644a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 927d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 928ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 929c16cb8f2SBarry Smith Scalar *b,*x,*xs,*ls,d,*v,sum; 9306abc6512SBarry Smith int ierr,*idx, *diag; 931416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 9328a729477SBarry Smith 933d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 934dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 935dbb450caSBarry Smith ls = ls + shift; 936*83e2fdc7SBarry Smith if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 937d6dfbf8fSBarry Smith diag = A->diag; 938c16cb8f2SBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 939da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 94038597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 941da3a660dSBarry Smith } 94243a90d84SBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 94378b31e54SBarry Smith CHKERRQ(ierr); 94443a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 94578b31e54SBarry Smith CHKERRQ(ierr); 946d6dfbf8fSBarry Smith while (its--) { 947d6dfbf8fSBarry Smith /* go down through the rows */ 948d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 949d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 950dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 951dbb450caSBarry Smith v = A->a + A->i[i] + shift; 952d6dfbf8fSBarry Smith sum = b[i]; 953d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 954dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 955d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 956dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 957dbb450caSBarry Smith v = B->a + B->i[i] + shift; 958d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 95955a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 960d6dfbf8fSBarry Smith } 961d6dfbf8fSBarry Smith /* come up through the rows */ 962d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 963d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 964dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 965dbb450caSBarry Smith v = A->a + A->i[i] + shift; 966d6dfbf8fSBarry Smith sum = b[i]; 967d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 968dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 969d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 970dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 971dbb450caSBarry Smith v = B->a + B->i[i] + shift; 972d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 97355a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 974d6dfbf8fSBarry Smith } 975d6dfbf8fSBarry Smith } 976d6dfbf8fSBarry Smith } 977d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 978da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 97938597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 980da3a660dSBarry Smith } 98143a90d84SBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 98278b31e54SBarry Smith CHKERRQ(ierr); 98343a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 98478b31e54SBarry Smith CHKERRQ(ierr); 985d6dfbf8fSBarry Smith while (its--) { 986d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 987d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 988dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 989dbb450caSBarry Smith v = A->a + A->i[i] + shift; 990d6dfbf8fSBarry Smith sum = b[i]; 991d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 992dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 993d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 994dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 995dbb450caSBarry Smith v = B->a + B->i[i] + shift; 996d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 99755a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 998d6dfbf8fSBarry Smith } 999d6dfbf8fSBarry Smith } 1000d6dfbf8fSBarry Smith } 1001d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1002da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 100338597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 1004da3a660dSBarry Smith } 100543a90d84SBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 100678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 100743a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 100878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 1009d6dfbf8fSBarry Smith while (its--) { 1010d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 1011d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 1012dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 1013dbb450caSBarry Smith v = A->a + A->i[i] + shift; 1014d6dfbf8fSBarry Smith sum = b[i]; 1015d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 1016dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 1017d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 1018dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 1019dbb450caSBarry Smith v = B->a + B->i[i] + shift; 1020d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 102155a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1022d6dfbf8fSBarry Smith } 1023d6dfbf8fSBarry Smith } 1024d6dfbf8fSBarry Smith } 1025c16cb8f2SBarry Smith else { 1026e3372554SBarry Smith SETERRQ(1,0,"Option not supported"); 1027c16cb8f2SBarry Smith } 10288a729477SBarry Smith return 0; 10298a729477SBarry Smith } 1030a66be287SLois Curfman McInnes 10315615d1e5SSatish Balay #undef __FUNC__ 10325eea60f9SBarry Smith #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */ 10338f6be9afSLois Curfman McInnes int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1034a66be287SLois Curfman McInnes { 1035a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1036a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 10374e220ebcSLois Curfman McInnes int ierr; 10384e220ebcSLois Curfman McInnes double isend[5], irecv[5]; 1039a66be287SLois Curfman McInnes 10404e220ebcSLois Curfman McInnes info->rows_global = (double)mat->M; 10414e220ebcSLois Curfman McInnes info->columns_global = (double)mat->N; 10424e220ebcSLois Curfman McInnes info->rows_local = (double)mat->m; 10434e220ebcSLois Curfman McInnes info->columns_local = (double)mat->N; 10444e220ebcSLois Curfman McInnes info->block_size = 1.0; 10454e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 10464e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 10474e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 10484e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 10494e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 10504e220ebcSLois Curfman McInnes isend[3] += info->memory; isend[4] += info->mallocs; 1051a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 10524e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 10534e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 10544e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 10554e220ebcSLois Curfman McInnes info->memory = isend[3]; 10564e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 1057a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 10584e220ebcSLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 10594e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 10604e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 10614e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 10624e220ebcSLois Curfman McInnes info->memory = irecv[3]; 10634e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1064a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 10654e220ebcSLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 10664e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 10674e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 10684e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 10694e220ebcSLois Curfman McInnes info->memory = irecv[3]; 10704e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1071a66be287SLois Curfman McInnes } 10724e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 10734e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 10744e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 10754e220ebcSLois Curfman McInnes 1076a66be287SLois Curfman McInnes return 0; 1077a66be287SLois Curfman McInnes } 1078a66be287SLois Curfman McInnes 1079299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1080299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1081299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1082299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1083299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1084299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1085299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1086299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1087299609e3SLois Curfman McInnes 10885615d1e5SSatish Balay #undef __FUNC__ 10895eea60f9SBarry Smith #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */ 10908f6be9afSLois Curfman McInnes int MatSetOption_MPIAIJ(Mat A,MatOption op) 1091c74985f6SBarry Smith { 1092c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1093c74985f6SBarry Smith 10946d4a8577SBarry Smith if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 10956d4a8577SBarry Smith op == MAT_YES_NEW_NONZERO_LOCATIONS || 10966da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1097c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 109896854ed6SLois Curfman McInnes op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1099c2653b3dSLois Curfman McInnes op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1100b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 1101b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 1102b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1103aeafbbfcSLois Curfman McInnes a->roworiented = 1; 1104c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 1105c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 1106b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 11076da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 11086d4a8577SBarry Smith op == MAT_SYMMETRIC || 11096d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 11106d4a8577SBarry Smith op == MAT_YES_NEW_DIAGONALS) 111194a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 11126d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) { 11134b0e389bSBarry Smith a->roworiented = 0; 11144b0e389bSBarry Smith MatSetOption(a->A,op); 11154b0e389bSBarry Smith MatSetOption(a->B,op); 11162b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 111790f02eecSBarry Smith a->donotstash = 1; 111890f02eecSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) 1119e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1120c0bbcb79SLois Curfman McInnes else 1121e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1122c74985f6SBarry Smith return 0; 1123c74985f6SBarry Smith } 1124c74985f6SBarry Smith 11255615d1e5SSatish Balay #undef __FUNC__ 11265eea60f9SBarry Smith #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */ 11278f6be9afSLois Curfman McInnes int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1128c74985f6SBarry Smith { 112944a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1130c74985f6SBarry Smith *m = mat->M; *n = mat->N; 1131c74985f6SBarry Smith return 0; 1132c74985f6SBarry Smith } 1133c74985f6SBarry Smith 11345615d1e5SSatish Balay #undef __FUNC__ 11355eea60f9SBarry Smith #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */ 11368f6be9afSLois Curfman McInnes int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1137c74985f6SBarry Smith { 113844a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1139b7c46309SBarry Smith *m = mat->m; *n = mat->N; 1140c74985f6SBarry Smith return 0; 1141c74985f6SBarry Smith } 1142c74985f6SBarry Smith 11435615d1e5SSatish Balay #undef __FUNC__ 11445eea60f9SBarry Smith #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */ 11458f6be9afSLois Curfman McInnes int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1146c74985f6SBarry Smith { 114744a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1148c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 1149c74985f6SBarry Smith return 0; 1150c74985f6SBarry Smith } 1151c74985f6SBarry Smith 11526d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11536d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11546d84be18SBarry Smith 11555615d1e5SSatish Balay #undef __FUNC__ 11565615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIAIJ" 11576d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 115839e00950SLois Curfman McInnes { 1159154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 116070f0671dSBarry Smith Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1161154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1162154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 116370f0671dSBarry Smith int *cmap, *idx_p; 116439e00950SLois Curfman McInnes 1165e3372554SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 11667a0afa10SBarry Smith mat->getrowactive = PETSC_TRUE; 11677a0afa10SBarry Smith 116870f0671dSBarry Smith if (!mat->rowvalues && (idx || v)) { 11697a0afa10SBarry Smith /* 11707a0afa10SBarry Smith allocate enough space to hold information from the longest row. 11717a0afa10SBarry Smith */ 11727a0afa10SBarry Smith Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1173c16cb8f2SBarry Smith int max = 1,m = mat->m,tmp; 1174c16cb8f2SBarry Smith for ( i=0; i<m; i++ ) { 11757a0afa10SBarry Smith tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 11767a0afa10SBarry Smith if (max < tmp) { max = tmp; } 11777a0afa10SBarry Smith } 11787a0afa10SBarry Smith mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 11797a0afa10SBarry Smith CHKPTRQ(mat->rowvalues); 11807a0afa10SBarry Smith mat->rowindices = (int *) (mat->rowvalues + max); 11817a0afa10SBarry Smith } 11827a0afa10SBarry Smith 1183e3372554SBarry Smith if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1184abc0e9e4SLois Curfman McInnes lrow = row - rstart; 118539e00950SLois Curfman McInnes 1186154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1187154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1188154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 118938597bd4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 119038597bd4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1191154123eaSLois Curfman McInnes nztot = nzA + nzB; 1192154123eaSLois Curfman McInnes 119370f0671dSBarry Smith cmap = mat->garray; 1194154123eaSLois Curfman McInnes if (v || idx) { 1195154123eaSLois Curfman McInnes if (nztot) { 1196154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 119770f0671dSBarry Smith int imark = -1; 1198154123eaSLois Curfman McInnes if (v) { 119970f0671dSBarry Smith *v = v_p = mat->rowvalues; 120039e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 120170f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1202154123eaSLois Curfman McInnes else break; 1203154123eaSLois Curfman McInnes } 1204154123eaSLois Curfman McInnes imark = i; 120570f0671dSBarry Smith for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 120670f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1207154123eaSLois Curfman McInnes } 1208154123eaSLois Curfman McInnes if (idx) { 120970f0671dSBarry Smith *idx = idx_p = mat->rowindices; 121070f0671dSBarry Smith if (imark > -1) { 121170f0671dSBarry Smith for ( i=0; i<imark; i++ ) { 121270f0671dSBarry Smith idx_p[i] = cmap[cworkB[i]]; 121370f0671dSBarry Smith } 121470f0671dSBarry Smith } else { 1215154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 121670f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1217154123eaSLois Curfman McInnes else break; 1218154123eaSLois Curfman McInnes } 1219154123eaSLois Curfman McInnes imark = i; 122070f0671dSBarry Smith } 122170f0671dSBarry Smith for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 122270f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 122339e00950SLois Curfman McInnes } 122439e00950SLois Curfman McInnes } 12251ca473b0SSatish Balay else { 12261ca473b0SSatish Balay if (idx) *idx = 0; 12271ca473b0SSatish Balay if (v) *v = 0; 12281ca473b0SSatish Balay } 1229154123eaSLois Curfman McInnes } 123039e00950SLois Curfman McInnes *nz = nztot; 123138597bd4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 123238597bd4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 123339e00950SLois Curfman McInnes return 0; 123439e00950SLois Curfman McInnes } 123539e00950SLois Curfman McInnes 12365615d1e5SSatish Balay #undef __FUNC__ 12375eea60f9SBarry Smith #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */ 12386d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 123939e00950SLois Curfman McInnes { 12407a0afa10SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 12417a0afa10SBarry Smith if (aij->getrowactive == PETSC_FALSE) { 1242e3372554SBarry Smith SETERRQ(1,0,"MatGetRow not called"); 12437a0afa10SBarry Smith } 12447a0afa10SBarry Smith aij->getrowactive = PETSC_FALSE; 124539e00950SLois Curfman McInnes return 0; 124639e00950SLois Curfman McInnes } 124739e00950SLois Curfman McInnes 12485615d1e5SSatish Balay #undef __FUNC__ 12495615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIAIJ" 12508f6be9afSLois Curfman McInnes int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1251855ac2c5SLois Curfman McInnes { 1252855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1253ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1254416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1255416022c9SBarry Smith double sum = 0.0; 125604ca555eSLois Curfman McInnes Scalar *v; 125704ca555eSLois Curfman McInnes 125817699dbbSLois Curfman McInnes if (aij->size == 1) { 125914183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 126037fa93a5SLois Curfman McInnes } else { 126104ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 126204ca555eSLois Curfman McInnes v = amat->a; 126304ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 126404ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 126504ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 126604ca555eSLois Curfman McInnes #else 126704ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 126804ca555eSLois Curfman McInnes #endif 126904ca555eSLois Curfman McInnes } 127004ca555eSLois Curfman McInnes v = bmat->a; 127104ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 127204ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 127304ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 127404ca555eSLois Curfman McInnes #else 127504ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 127604ca555eSLois Curfman McInnes #endif 127704ca555eSLois Curfman McInnes } 12786d84be18SBarry Smith MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 127904ca555eSLois Curfman McInnes *norm = sqrt(*norm); 128004ca555eSLois Curfman McInnes } 128104ca555eSLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 128204ca555eSLois Curfman McInnes double *tmp, *tmp2; 128304ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 1284758f045eSSatish Balay tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1285758f045eSSatish Balay tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1286cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 128704ca555eSLois Curfman McInnes *norm = 0.0; 128804ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 128904ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1290579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 129104ca555eSLois Curfman McInnes } 129204ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 129304ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1294579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 129504ca555eSLois Curfman McInnes } 12966d84be18SBarry Smith MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 129704ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 129804ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 129904ca555eSLois Curfman McInnes } 13000452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 130104ca555eSLois Curfman McInnes } 130204ca555eSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 1303515d9167SLois Curfman McInnes double ntemp = 0.0; 130404ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1305dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 130604ca555eSLois Curfman McInnes sum = 0.0; 130704ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1308cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 130904ca555eSLois Curfman McInnes } 1310dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 131104ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1312cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 131304ca555eSLois Curfman McInnes } 1314515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 131504ca555eSLois Curfman McInnes } 13166d84be18SBarry Smith MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 131704ca555eSLois Curfman McInnes } 131804ca555eSLois Curfman McInnes else { 1319e3372554SBarry Smith SETERRQ(1,0,"No support for two norm"); 132004ca555eSLois Curfman McInnes } 132137fa93a5SLois Curfman McInnes } 1322855ac2c5SLois Curfman McInnes return 0; 1323855ac2c5SLois Curfman McInnes } 1324855ac2c5SLois Curfman McInnes 13255615d1e5SSatish Balay #undef __FUNC__ 13265615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIAIJ" 13278f6be9afSLois Curfman McInnes int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1328b7c46309SBarry Smith { 1329b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1330dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1331416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1332416022c9SBarry Smith Mat B; 1333b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1334b7c46309SBarry Smith Scalar *array; 1335b7c46309SBarry Smith 13363638b69dSLois Curfman McInnes if (matout == PETSC_NULL && M != N) 1337e3372554SBarry Smith SETERRQ(1,0,"Square matrix only for in-place"); 1338b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1339b4fd4287SBarry Smith PETSC_NULL,&B); CHKERRQ(ierr); 1340b7c46309SBarry Smith 1341b7c46309SBarry Smith /* copy over the A part */ 1342ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1343b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1344b7c46309SBarry Smith row = a->rstart; 1345dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1346b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1347416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1348b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1349b7c46309SBarry Smith } 1350b7c46309SBarry Smith aj = Aloc->j; 13514af08d9eSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1352b7c46309SBarry Smith 1353b7c46309SBarry Smith /* copy over the B part */ 1354ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1355b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1356b7c46309SBarry Smith row = a->rstart; 13570452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1358dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1359b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1360416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1361b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1362b7c46309SBarry Smith } 13630452661fSBarry Smith PetscFree(ct); 13646d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 13656d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 13663638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 13670de55854SLois Curfman McInnes *matout = B; 13680de55854SLois Curfman McInnes } else { 13690de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 13700452661fSBarry Smith PetscFree(a->rowners); 13710de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 13720de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 13730452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 13740452661fSBarry Smith if (a->garray) PetscFree(a->garray); 13750de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1376a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 13770452661fSBarry Smith PetscFree(a); 1378f09e8eb9SSatish Balay PetscMemcpy(A,B,sizeof(struct _p_Mat)); 13790452661fSBarry Smith PetscHeaderDestroy(B); 13800de55854SLois Curfman McInnes } 1381b7c46309SBarry Smith return 0; 1382b7c46309SBarry Smith } 1383b7c46309SBarry Smith 13845615d1e5SSatish Balay #undef __FUNC__ 13855615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIAIJ" 13864b967eb1SSatish Balay int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1387a008b906SSatish Balay { 13884b967eb1SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 13894b967eb1SSatish Balay Mat a = aij->A, b = aij->B; 1390a008b906SSatish Balay int ierr,s1,s2,s3; 1391a008b906SSatish Balay 13924b967eb1SSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 13934b967eb1SSatish Balay if (rr) { 13944b967eb1SSatish Balay s3 = aij->n; 13954b967eb1SSatish Balay VecGetLocalSize_Fast(rr,s1); 1396e3372554SBarry Smith if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 13974b967eb1SSatish Balay /* Overlap communication with computation. */ 139843a90d84SBarry Smith ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1399a008b906SSatish Balay } 14004b967eb1SSatish Balay if (ll) { 14014b967eb1SSatish Balay VecGetLocalSize_Fast(ll,s1); 1402e3372554SBarry Smith if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1403c351683dSSatish Balay ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 14044b967eb1SSatish Balay } 14054b967eb1SSatish Balay /* scale the diagonal block */ 1406c351683dSSatish Balay ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 14074b967eb1SSatish Balay 14084b967eb1SSatish Balay if (rr) { 14094b967eb1SSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 141043a90d84SBarry Smith ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1411c351683dSSatish Balay ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 14124b967eb1SSatish Balay } 14134b967eb1SSatish Balay 1414a008b906SSatish Balay return 0; 1415a008b906SSatish Balay } 1416a008b906SSatish Balay 1417a008b906SSatish Balay 1418682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat); 14195615d1e5SSatish Balay #undef __FUNC__ 14205eea60f9SBarry Smith #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */ 14218f6be9afSLois Curfman McInnes int MatPrintHelp_MPIAIJ(Mat A) 1422682d7d0cSBarry Smith { 1423682d7d0cSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1424682d7d0cSBarry Smith 1425682d7d0cSBarry Smith if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1426682d7d0cSBarry Smith else return 0; 1427682d7d0cSBarry Smith } 1428682d7d0cSBarry Smith 14295615d1e5SSatish Balay #undef __FUNC__ 14305eea60f9SBarry Smith #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */ 14318f6be9afSLois Curfman McInnes int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 14325a838052SSatish Balay { 14335a838052SSatish Balay *bs = 1; 14345a838052SSatish Balay return 0; 14355a838052SSatish Balay } 14365615d1e5SSatish Balay #undef __FUNC__ 14375eea60f9SBarry Smith #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */ 14388f6be9afSLois Curfman McInnes int MatSetUnfactored_MPIAIJ(Mat A) 1439bb5a7306SBarry Smith { 1440bb5a7306SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1441bb5a7306SBarry Smith int ierr; 1442bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1443bb5a7306SBarry Smith return 0; 1444bb5a7306SBarry Smith } 1445bb5a7306SBarry Smith 14468f6be9afSLois Curfman McInnes extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 14472f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 14480a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 14490a198c4cSBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 14508a729477SBarry Smith /* -------------------------------------------------------------------*/ 14512ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 145239e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 145344a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 145444a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 145536ce4990SBarry Smith 0,0, 145636ce4990SBarry Smith 0,0, 145736ce4990SBarry Smith 0,0, 145844a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1459b7c46309SBarry Smith MatTranspose_MPIAIJ, 1460a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1461a008b906SSatish Balay MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1462ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 14631eb62cbbSBarry Smith 0, 1464299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 146536ce4990SBarry Smith 0,0,0,0, 1466d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 146736ce4990SBarry Smith 0,0, 146894a9d846SBarry Smith 0,0,MatConvertSameType_MPIAIJ,0,0, 1469b49de8d1SLois Curfman McInnes 0,0,0, 1470598137ffSSatish Balay MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1471052efed2SBarry Smith MatPrintHelp_MPIAIJ, 14723b2fbd54SBarry Smith MatScale_MPIAIJ,0,0,0, 14730a198c4cSBarry Smith MatGetBlockSize_MPIAIJ,0,0,0,0, 1474bb5a7306SBarry Smith MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ}; 147536ce4990SBarry Smith 14768a729477SBarry Smith 14775615d1e5SSatish Balay #undef __FUNC__ 14785615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIAIJ" 14791987afe7SBarry Smith /*@C 1480ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 14813a511b96SLois Curfman McInnes (the default parallel PETSc format). For good matrix assembly performance 14823a511b96SLois Curfman McInnes the user should preallocate the matrix storage by setting the parameters 14833a511b96SLois Curfman McInnes d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 14843a511b96SLois Curfman McInnes performance can be increased by more than a factor of 50. 14858a729477SBarry Smith 14868a729477SBarry Smith Input Parameters: 14871eb62cbbSBarry Smith . comm - MPI communicator 14887d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 148992e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 149092e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 14911a3896d6SBarry Smith . n - This value should be the same as the local size used in creating the 14921a3896d6SBarry Smith x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 14931a3896d6SBarry Smith calculated if N is given) For square matrices n is almost always m. 14947d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 149592e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1496ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1497ff756334SLois Curfman McInnes (same for all local rows) 14982bd5e0b2SLois Curfman McInnes . d_nzz - array containing the number of nonzeros in the various rows of the 149992e8d321SLois Curfman McInnes diagonal portion of the local submatrix (possibly different for each row) 15002bd5e0b2SLois Curfman McInnes or PETSC_NULL. You must leave room for the diagonal entry even if 15012bd5e0b2SLois Curfman McInnes it is zero. 15022bd5e0b2SLois Curfman McInnes . o_nz - number of nonzeros per row in the off-diagonal portion of local 1503ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 15042bd5e0b2SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various rows of the 15052bd5e0b2SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 15062bd5e0b2SLois Curfman McInnes each row) or PETSC_NULL. 15078a729477SBarry Smith 1508ff756334SLois Curfman McInnes Output Parameter: 150944cd7ae7SLois Curfman McInnes . A - the matrix 15108a729477SBarry Smith 1511ff756334SLois Curfman McInnes Notes: 1512ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1513ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 15140002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 15150002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1516ff756334SLois Curfman McInnes 1517ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1518ff756334SLois Curfman McInnes (possibly both). 1519ff756334SLois Curfman McInnes 15205511cfe3SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 15215511cfe3SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 15225511cfe3SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 15235511cfe3SLois Curfman McInnes 15245511cfe3SLois Curfman McInnes Options Database Keys: 15255511cfe3SLois Curfman McInnes $ -mat_aij_no_inode - Do not use inodes 15266e62573dSLois Curfman McInnes $ -mat_aij_inode_limit <limit> - Set inode limit. 15276e62573dSLois Curfman McInnes $ (max limit=5) 15286e62573dSLois Curfman McInnes $ -mat_aij_oneindex - Internally use indexing starting at 1 15296e62573dSLois Curfman McInnes $ rather than 0. Note: When calling MatSetValues(), 15306e62573dSLois Curfman McInnes $ the user still MUST index entries starting at 0! 15315511cfe3SLois Curfman McInnes 1532e0245417SLois Curfman McInnes Storage Information: 1533e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1534e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1535e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1536e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1537e0245417SLois Curfman McInnes 1538e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 15395ace5be8SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set 15405ace5be8SLois Curfman McInnes d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 15415ace5be8SLois Curfman McInnes memory allocation. Likewise, specify preallocated storage for the 15425ace5be8SLois Curfman McInnes off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1543ff756334SLois Curfman McInnes 15445511cfe3SLois Curfman McInnes Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 15455511cfe3SLois Curfman McInnes the figure below we depict these three local rows and all columns (0-11). 15462191d07cSBarry Smith 1547b810aeb4SBarry Smith $ 0 1 2 3 4 5 6 7 8 9 10 11 1548b810aeb4SBarry Smith $ ------------------- 15495511cfe3SLois Curfman McInnes $ row 3 | o o o d d d o o o o o o 15505511cfe3SLois Curfman McInnes $ row 4 | o o o d d d o o o o o o 15515511cfe3SLois Curfman McInnes $ row 5 | o o o d d d o o o o o o 15525511cfe3SLois Curfman McInnes $ ------------------- 1553b810aeb4SBarry Smith $ 1554b810aeb4SBarry Smith 15555511cfe3SLois Curfman McInnes Thus, any entries in the d locations are stored in the d (diagonal) 15565511cfe3SLois Curfman McInnes submatrix, and any entries in the o locations are stored in the 15575511cfe3SLois Curfman McInnes o (off-diagonal) submatrix. Note that the d and the o submatrices are 15585511cfe3SLois Curfman McInnes stored simply in the MATSEQAIJ format for compressed row storage. 15595511cfe3SLois Curfman McInnes 15605511cfe3SLois Curfman McInnes Now d_nz should indicate the number of nonzeros per row in the d matrix, 15615511cfe3SLois Curfman McInnes and o_nz should indicate the number of nonzeros per row in the o matrix. 15625511cfe3SLois Curfman McInnes In general, for PDE problems in which most nonzeros are near the diagonal, 15633d323bbdSBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 156492e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 15656da5968aSLois Curfman McInnes matrices. 15663a511b96SLois Curfman McInnes 1567dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1568ff756334SLois Curfman McInnes 1569fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 15708a729477SBarry Smith @*/ 15711eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 157244cd7ae7SLois Curfman McInnes int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 15738a729477SBarry Smith { 157444cd7ae7SLois Curfman McInnes Mat B; 157544cd7ae7SLois Curfman McInnes Mat_MPIAIJ *b; 157636ce4990SBarry Smith int ierr, i,sum[2],work[2],size; 1577416022c9SBarry Smith 157844cd7ae7SLois Curfman McInnes *A = 0; 1579f09e8eb9SSatish Balay PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm); 158044cd7ae7SLois Curfman McInnes PLogObjectCreate(B); 158144cd7ae7SLois Curfman McInnes B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 158244cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_MPIAIJ)); 158344cd7ae7SLois Curfman McInnes PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 158436ce4990SBarry Smith MPI_Comm_size(comm,&size); 158536ce4990SBarry Smith if (size == 1) { 158636ce4990SBarry Smith B->ops.getrowij = MatGetRowIJ_MPIAIJ; 158736ce4990SBarry Smith B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 158836ce4990SBarry Smith B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 158936ce4990SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 159036ce4990SBarry Smith B->ops.lufactor = MatLUFactor_MPIAIJ; 159136ce4990SBarry Smith B->ops.solve = MatSolve_MPIAIJ; 159236ce4990SBarry Smith B->ops.solveadd = MatSolveAdd_MPIAIJ; 159336ce4990SBarry Smith B->ops.solvetrans = MatSolveTrans_MPIAIJ; 159436ce4990SBarry Smith B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 159536ce4990SBarry Smith B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 159636ce4990SBarry Smith } 159744cd7ae7SLois Curfman McInnes B->destroy = MatDestroy_MPIAIJ; 159844cd7ae7SLois Curfman McInnes B->view = MatView_MPIAIJ; 159944cd7ae7SLois Curfman McInnes B->factor = 0; 160044cd7ae7SLois Curfman McInnes B->assembled = PETSC_FALSE; 160190f02eecSBarry Smith B->mapping = 0; 1602d6dfbf8fSBarry Smith 160347794344SBarry Smith B->insertmode = NOT_SET_VALUES; 160444cd7ae7SLois Curfman McInnes MPI_Comm_rank(comm,&b->rank); 160544cd7ae7SLois Curfman McInnes MPI_Comm_size(comm,&b->size); 16061eb62cbbSBarry Smith 1607b4fd4287SBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1608e3372554SBarry Smith SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 16091987afe7SBarry Smith 1610dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 16111eb62cbbSBarry Smith work[0] = m; work[1] = n; 1612d65a2f8fSBarry Smith MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1613dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1614dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 16151eb62cbbSBarry Smith } 161644cd7ae7SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 161744cd7ae7SLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 161844cd7ae7SLois Curfman McInnes b->m = m; B->m = m; 161944cd7ae7SLois Curfman McInnes b->n = n; B->n = n; 162044cd7ae7SLois Curfman McInnes b->N = N; B->N = N; 162144cd7ae7SLois Curfman McInnes b->M = M; B->M = M; 16221eb62cbbSBarry Smith 16231eb62cbbSBarry Smith /* build local table of row and column ownerships */ 162444cd7ae7SLois Curfman McInnes b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1625f09e8eb9SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1626603f58a4SSatish Balay b->cowners = b->rowners + b->size + 2; 162744cd7ae7SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 162844cd7ae7SLois Curfman McInnes b->rowners[0] = 0; 162944cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 163044cd7ae7SLois Curfman McInnes b->rowners[i] += b->rowners[i-1]; 16318a729477SBarry Smith } 163244cd7ae7SLois Curfman McInnes b->rstart = b->rowners[b->rank]; 163344cd7ae7SLois Curfman McInnes b->rend = b->rowners[b->rank+1]; 163444cd7ae7SLois Curfman McInnes MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 163544cd7ae7SLois Curfman McInnes b->cowners[0] = 0; 163644cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 163744cd7ae7SLois Curfman McInnes b->cowners[i] += b->cowners[i-1]; 16388a729477SBarry Smith } 163944cd7ae7SLois Curfman McInnes b->cstart = b->cowners[b->rank]; 164044cd7ae7SLois Curfman McInnes b->cend = b->cowners[b->rank+1]; 16418a729477SBarry Smith 16425ace5be8SLois Curfman McInnes if (d_nz == PETSC_DEFAULT) d_nz = 5; 1643029af93fSBarry Smith ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 164444cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->A); 16457b8455f0SLois Curfman McInnes if (o_nz == PETSC_DEFAULT) o_nz = 0; 1646029af93fSBarry Smith ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 164744cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->B); 16488a729477SBarry Smith 16491eb62cbbSBarry Smith /* build cache for off array entries formed */ 165044cd7ae7SLois Curfman McInnes ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 165190f02eecSBarry Smith b->donotstash = 0; 165244cd7ae7SLois Curfman McInnes b->colmap = 0; 165344cd7ae7SLois Curfman McInnes b->garray = 0; 165444cd7ae7SLois Curfman McInnes b->roworiented = 1; 16558a729477SBarry Smith 16561eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 165744cd7ae7SLois Curfman McInnes b->lvec = 0; 165844cd7ae7SLois Curfman McInnes b->Mvctx = 0; 16598a729477SBarry Smith 16607a0afa10SBarry Smith /* stuff for MatGetRow() */ 166144cd7ae7SLois Curfman McInnes b->rowindices = 0; 166244cd7ae7SLois Curfman McInnes b->rowvalues = 0; 166344cd7ae7SLois Curfman McInnes b->getrowactive = PETSC_FALSE; 16647a0afa10SBarry Smith 166544cd7ae7SLois Curfman McInnes *A = B; 1666d6dfbf8fSBarry Smith return 0; 1667d6dfbf8fSBarry Smith } 1668c74985f6SBarry Smith 16695615d1e5SSatish Balay #undef __FUNC__ 16705615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIAIJ" 16718f6be9afSLois Curfman McInnes int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1672d6dfbf8fSBarry Smith { 1673d6dfbf8fSBarry Smith Mat mat; 1674416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1675a1b97e82SLois Curfman McInnes int ierr, len=0, flg; 1676d6dfbf8fSBarry Smith 1677416022c9SBarry Smith *newmat = 0; 1678f09e8eb9SSatish Balay PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1679a5a9c739SBarry Smith PLogObjectCreate(mat); 16800452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1681416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 168244a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 168344a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1684d6dfbf8fSBarry Smith mat->factor = matin->factor; 1685c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1686d6dfbf8fSBarry Smith 168744cd7ae7SLois Curfman McInnes a->m = mat->m = oldmat->m; 168844cd7ae7SLois Curfman McInnes a->n = mat->n = oldmat->n; 168944cd7ae7SLois Curfman McInnes a->M = mat->M = oldmat->M; 169044cd7ae7SLois Curfman McInnes a->N = mat->N = oldmat->N; 1691d6dfbf8fSBarry Smith 1692416022c9SBarry Smith a->rstart = oldmat->rstart; 1693416022c9SBarry Smith a->rend = oldmat->rend; 1694416022c9SBarry Smith a->cstart = oldmat->cstart; 1695416022c9SBarry Smith a->cend = oldmat->cend; 169617699dbbSLois Curfman McInnes a->size = oldmat->size; 169717699dbbSLois Curfman McInnes a->rank = oldmat->rank; 169847794344SBarry Smith mat->insertmode = NOT_SET_VALUES; 1699bcd2baecSBarry Smith a->rowvalues = 0; 1700bcd2baecSBarry Smith a->getrowactive = PETSC_FALSE; 1701d6dfbf8fSBarry Smith 1702603f58a4SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1703f09e8eb9SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1704603f58a4SSatish Balay a->cowners = a->rowners + a->size + 2; 1705603f58a4SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1706416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 17072ee70a88SLois Curfman McInnes if (oldmat->colmap) { 17080452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1709416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1710416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1711416022c9SBarry Smith } else a->colmap = 0; 17123f41c07dSBarry Smith if (oldmat->garray) { 17133f41c07dSBarry Smith len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 17143f41c07dSBarry Smith a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1715464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 17163f41c07dSBarry Smith if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1717416022c9SBarry Smith } else a->garray = 0; 1718d6dfbf8fSBarry Smith 1719416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1720416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1721a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1722416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1723416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1724416022c9SBarry Smith PLogObjectParent(mat,a->A); 1725416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1726416022c9SBarry Smith PLogObjectParent(mat,a->B); 17275dd7a6c7SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 172825cdf11fSBarry Smith if (flg) { 1729682d7d0cSBarry Smith ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1730682d7d0cSBarry Smith } 17318a729477SBarry Smith *newmat = mat; 17328a729477SBarry Smith return 0; 17338a729477SBarry Smith } 1734416022c9SBarry Smith 173577c4ece6SBarry Smith #include "sys.h" 1736416022c9SBarry Smith 17375615d1e5SSatish Balay #undef __FUNC__ 17385615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIAIJ" 173919bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1740416022c9SBarry Smith { 1741d65a2f8fSBarry Smith Mat A; 1742416022c9SBarry Smith int i, nz, ierr, j,rstart, rend, fd; 1743d65a2f8fSBarry Smith Scalar *vals,*svals; 174419bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 1745416022c9SBarry Smith MPI_Status status; 174617699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1747d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 174819bcc07fSBarry Smith int tag = ((PetscObject)viewer)->tag; 1749416022c9SBarry Smith 175017699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 175117699dbbSLois Curfman McInnes if (!rank) { 175290ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 175377c4ece6SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1754e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1755416022c9SBarry Smith } 1756416022c9SBarry Smith 1757416022c9SBarry Smith MPI_Bcast(header+1,3,MPI_INT,0,comm); 1758416022c9SBarry Smith M = header[1]; N = header[2]; 1759416022c9SBarry Smith /* determine ownership of all rows */ 176017699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 17610452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1762416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1763416022c9SBarry Smith rowners[0] = 0; 176417699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1765416022c9SBarry Smith rowners[i] += rowners[i-1]; 1766416022c9SBarry Smith } 176717699dbbSLois Curfman McInnes rstart = rowners[rank]; 176817699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1769416022c9SBarry Smith 1770416022c9SBarry Smith /* distribute row lengths to all processors */ 17710452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1772416022c9SBarry Smith offlens = ourlens + (rend-rstart); 177317699dbbSLois Curfman McInnes if (!rank) { 17740452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 177577c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 17760452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 177717699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1778d65a2f8fSBarry Smith MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 17790452661fSBarry Smith PetscFree(sndcounts); 1780416022c9SBarry Smith } 1781416022c9SBarry Smith else { 1782416022c9SBarry Smith MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1783416022c9SBarry Smith } 1784416022c9SBarry Smith 178517699dbbSLois Curfman McInnes if (!rank) { 1786416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 17870452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1788cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 178917699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1790416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1791416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1792416022c9SBarry Smith } 1793416022c9SBarry Smith } 17940452661fSBarry Smith PetscFree(rowlengths); 1795416022c9SBarry Smith 1796416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1797416022c9SBarry Smith maxnz = 0; 179817699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 17990452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 1800416022c9SBarry Smith } 18010452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1802416022c9SBarry Smith 1803416022c9SBarry Smith /* read in my part of the matrix column indices */ 1804416022c9SBarry Smith nz = procsnz[0]; 18050452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 180677c4ece6SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1807d65a2f8fSBarry Smith 1808d65a2f8fSBarry Smith /* read in every one elses and ship off */ 180917699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1810d65a2f8fSBarry Smith nz = procsnz[i]; 181177c4ece6SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1812d65a2f8fSBarry Smith MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1813d65a2f8fSBarry Smith } 18140452661fSBarry Smith PetscFree(cols); 1815416022c9SBarry Smith } 1816416022c9SBarry Smith else { 1817416022c9SBarry Smith /* determine buffer space needed for message */ 1818416022c9SBarry Smith nz = 0; 1819416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1820416022c9SBarry Smith nz += ourlens[i]; 1821416022c9SBarry Smith } 18220452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1823416022c9SBarry Smith 1824416022c9SBarry Smith /* receive message of column indices*/ 1825d65a2f8fSBarry Smith MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1826416022c9SBarry Smith MPI_Get_count(&status,MPI_INT,&maxnz); 1827e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1828416022c9SBarry Smith } 1829416022c9SBarry Smith 1830416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1831cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 1832416022c9SBarry Smith jj = 0; 1833416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1834416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1835d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1836416022c9SBarry Smith jj++; 1837416022c9SBarry Smith } 1838416022c9SBarry Smith } 1839d65a2f8fSBarry Smith 1840d65a2f8fSBarry Smith /* create our matrix */ 1841416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1842416022c9SBarry Smith ourlens[i] -= offlens[i]; 1843416022c9SBarry Smith } 1844d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1845d65a2f8fSBarry Smith A = *newmat; 18466d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 1847d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1848d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1849d65a2f8fSBarry Smith } 1850416022c9SBarry Smith 185117699dbbSLois Curfman McInnes if (!rank) { 18520452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1853416022c9SBarry Smith 1854416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1855416022c9SBarry Smith nz = procsnz[0]; 185677c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1857d65a2f8fSBarry Smith 1858d65a2f8fSBarry Smith /* insert into matrix */ 1859d65a2f8fSBarry Smith jj = rstart; 1860d65a2f8fSBarry Smith smycols = mycols; 1861d65a2f8fSBarry Smith svals = vals; 1862d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1863d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1864d65a2f8fSBarry Smith smycols += ourlens[i]; 1865d65a2f8fSBarry Smith svals += ourlens[i]; 1866d65a2f8fSBarry Smith jj++; 1867416022c9SBarry Smith } 1868416022c9SBarry Smith 1869d65a2f8fSBarry Smith /* read in other processors and ship out */ 187017699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1871416022c9SBarry Smith nz = procsnz[i]; 187277c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1873d65a2f8fSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1874416022c9SBarry Smith } 18750452661fSBarry Smith PetscFree(procsnz); 1876416022c9SBarry Smith } 1877d65a2f8fSBarry Smith else { 1878d65a2f8fSBarry Smith /* receive numeric values */ 18790452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1880416022c9SBarry Smith 1881d65a2f8fSBarry Smith /* receive message of values*/ 1882d65a2f8fSBarry Smith MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1883d65a2f8fSBarry Smith MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1884e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1885d65a2f8fSBarry Smith 1886d65a2f8fSBarry Smith /* insert into matrix */ 1887d65a2f8fSBarry Smith jj = rstart; 1888d65a2f8fSBarry Smith smycols = mycols; 1889d65a2f8fSBarry Smith svals = vals; 1890d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1891d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1892d65a2f8fSBarry Smith smycols += ourlens[i]; 1893d65a2f8fSBarry Smith svals += ourlens[i]; 1894d65a2f8fSBarry Smith jj++; 1895d65a2f8fSBarry Smith } 1896d65a2f8fSBarry Smith } 18970452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1898d65a2f8fSBarry Smith 18996d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 19006d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1901416022c9SBarry Smith return 0; 1902416022c9SBarry Smith } 1903