1cb512458SBarry Smith #ifndef lint 2*2b362799SSatish Balay static char vcid[] = "$Id: mpiaij.c,v 1.197 1997/03/30 03:06:02 balay Exp balay $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 53369ce9aSBarry Smith #include "pinclude/pviewer.h" 670f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h" 7f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 8d9942c19SSatish Balay #include "src/inline/spops.h" 98a729477SBarry Smith 109e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 119e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 129e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 139e25ed09SBarry Smith length of colmap equals the global matrix length. 149e25ed09SBarry Smith */ 155615d1e5SSatish Balay #undef __FUNC__ 165eea60f9SBarry Smith #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */ 170a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat) 189e25ed09SBarry Smith { 1944a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 20ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 21905e6a2fSBarry Smith int n = B->n,i; 22dbb450caSBarry Smith 230452661fSBarry Smith aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 24464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 25cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 26905e6a2fSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 279e25ed09SBarry Smith return 0; 289e25ed09SBarry Smith } 299e25ed09SBarry Smith 302493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 312493cbb0SBarry Smith 325615d1e5SSatish Balay #undef __FUNC__ 335615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIAIJ" 348f6be9afSLois Curfman McInnes int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 353b2fbd54SBarry Smith PetscTruth *done) 36299609e3SLois Curfman McInnes { 37299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 38299609e3SLois Curfman McInnes int ierr; 3917699dbbSLois Curfman McInnes if (aij->size == 1) { 403b2fbd54SBarry Smith ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 41e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 423b2fbd54SBarry Smith return 0; 433b2fbd54SBarry Smith } 443b2fbd54SBarry Smith 455615d1e5SSatish Balay #undef __FUNC__ 465eea60f9SBarry Smith #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */ 478f6be9afSLois Curfman McInnes int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 483b2fbd54SBarry Smith PetscTruth *done) 493b2fbd54SBarry Smith { 503b2fbd54SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 513b2fbd54SBarry Smith int ierr; 523b2fbd54SBarry Smith if (aij->size == 1) { 533b2fbd54SBarry Smith ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 54e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 55299609e3SLois Curfman McInnes return 0; 56299609e3SLois Curfman McInnes } 57299609e3SLois Curfman McInnes 580520107fSSatish Balay #define CHUNKSIZE 15 5930770e4dSSatish Balay #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 600520107fSSatish Balay { \ 610520107fSSatish Balay \ 620520107fSSatish Balay rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 6330770e4dSSatish Balay rmax = aimax[row]; nrow = ailen[row]; \ 64f5e9677aSSatish Balay col1 = col - shift; \ 65f5e9677aSSatish Balay \ 660520107fSSatish Balay for ( _i=0; _i<nrow; _i++ ) { \ 67f5e9677aSSatish Balay if (rp[_i] > col1) break; \ 68f5e9677aSSatish Balay if (rp[_i] == col1) { \ 690520107fSSatish Balay if (addv == ADD_VALUES) ap[_i] += value; \ 700520107fSSatish Balay else ap[_i] = value; \ 7130770e4dSSatish Balay goto a_noinsert; \ 720520107fSSatish Balay } \ 730520107fSSatish Balay } \ 7430770e4dSSatish Balay if (nonew) goto a_noinsert; \ 750520107fSSatish Balay if (nrow >= rmax) { \ 760520107fSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 770520107fSSatish Balay int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 780520107fSSatish Balay Scalar *new_a; \ 790520107fSSatish Balay \ 800520107fSSatish Balay /* malloc new storage space */ \ 810520107fSSatish Balay len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 820520107fSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 830520107fSSatish Balay new_j = (int *) (new_a + new_nz); \ 840520107fSSatish Balay new_i = new_j + new_nz; \ 850520107fSSatish Balay \ 860520107fSSatish Balay /* copy over old data into new slots */ \ 870520107fSSatish Balay for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 880520107fSSatish Balay for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 890520107fSSatish Balay PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 900520107fSSatish Balay len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 910520107fSSatish Balay PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 920520107fSSatish Balay len*sizeof(int)); \ 930520107fSSatish Balay PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 940520107fSSatish Balay PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 950520107fSSatish Balay len*sizeof(Scalar)); \ 960520107fSSatish Balay /* free up old matrix storage */ \ 97f5e9677aSSatish Balay \ 980520107fSSatish Balay PetscFree(a->a); \ 990520107fSSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 1000520107fSSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 1010520107fSSatish Balay a->singlemalloc = 1; \ 1020520107fSSatish Balay \ 1030520107fSSatish Balay rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 10430770e4dSSatish Balay rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 1050520107fSSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 1060520107fSSatish Balay a->maxnz += CHUNKSIZE; \ 1070520107fSSatish Balay a->reallocs++; \ 1080520107fSSatish Balay } \ 1090520107fSSatish Balay N = nrow++ - 1; a->nz++; \ 1100520107fSSatish Balay /* shift up all the later entries in this row */ \ 1110520107fSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 1120520107fSSatish Balay rp[ii+1] = rp[ii]; \ 1130520107fSSatish Balay ap[ii+1] = ap[ii]; \ 1140520107fSSatish Balay } \ 115f5e9677aSSatish Balay rp[_i] = col1; \ 1160520107fSSatish Balay ap[_i] = value; \ 11730770e4dSSatish Balay a_noinsert: ; \ 1180520107fSSatish Balay ailen[row] = nrow; \ 1190520107fSSatish Balay } 1200a198c4cSBarry Smith 12130770e4dSSatish Balay #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 12230770e4dSSatish Balay { \ 12330770e4dSSatish Balay \ 12430770e4dSSatish Balay rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 12530770e4dSSatish Balay rmax = bimax[row]; nrow = bilen[row]; \ 12630770e4dSSatish Balay col1 = col - shift; \ 12730770e4dSSatish Balay \ 12830770e4dSSatish Balay for ( _i=0; _i<nrow; _i++ ) { \ 12930770e4dSSatish Balay if (rp[_i] > col1) break; \ 13030770e4dSSatish Balay if (rp[_i] == col1) { \ 13130770e4dSSatish Balay if (addv == ADD_VALUES) ap[_i] += value; \ 13230770e4dSSatish Balay else ap[_i] = value; \ 13330770e4dSSatish Balay goto b_noinsert; \ 13430770e4dSSatish Balay } \ 13530770e4dSSatish Balay } \ 13630770e4dSSatish Balay if (nonew) goto b_noinsert; \ 13730770e4dSSatish Balay if (nrow >= rmax) { \ 13830770e4dSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 13974c639caSSatish Balay int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 14030770e4dSSatish Balay Scalar *new_a; \ 14130770e4dSSatish Balay \ 14230770e4dSSatish Balay /* malloc new storage space */ \ 14374c639caSSatish Balay len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 14430770e4dSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 14530770e4dSSatish Balay new_j = (int *) (new_a + new_nz); \ 14630770e4dSSatish Balay new_i = new_j + new_nz; \ 14730770e4dSSatish Balay \ 14830770e4dSSatish Balay /* copy over old data into new slots */ \ 14930770e4dSSatish Balay for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 15074c639caSSatish Balay for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 15130770e4dSSatish Balay PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 15230770e4dSSatish Balay len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 15330770e4dSSatish Balay PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 15430770e4dSSatish Balay len*sizeof(int)); \ 15530770e4dSSatish Balay PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 15630770e4dSSatish Balay PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 15730770e4dSSatish Balay len*sizeof(Scalar)); \ 15830770e4dSSatish Balay /* free up old matrix storage */ \ 15930770e4dSSatish Balay \ 16074c639caSSatish Balay PetscFree(b->a); \ 16174c639caSSatish Balay if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 16274c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 16374c639caSSatish Balay b->singlemalloc = 1; \ 16430770e4dSSatish Balay \ 16530770e4dSSatish Balay rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 16630770e4dSSatish Balay rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 16774c639caSSatish Balay PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 16874c639caSSatish Balay b->maxnz += CHUNKSIZE; \ 16974c639caSSatish Balay b->reallocs++; \ 17030770e4dSSatish Balay } \ 17174c639caSSatish Balay N = nrow++ - 1; b->nz++; \ 17230770e4dSSatish Balay /* shift up all the later entries in this row */ \ 17330770e4dSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 17430770e4dSSatish Balay rp[ii+1] = rp[ii]; \ 17530770e4dSSatish Balay ap[ii+1] = ap[ii]; \ 17630770e4dSSatish Balay } \ 17730770e4dSSatish Balay rp[_i] = col1; \ 17830770e4dSSatish Balay ap[_i] = value; \ 17930770e4dSSatish Balay b_noinsert: ; \ 18030770e4dSSatish Balay bilen[row] = nrow; \ 18130770e4dSSatish Balay } 18230770e4dSSatish Balay 1830520107fSSatish Balay extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 1845615d1e5SSatish Balay #undef __FUNC__ 1855615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIAIJ" 1868f6be9afSLois Curfman McInnes int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 1878a729477SBarry Smith { 18844a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1894b0e389bSBarry Smith Scalar value; 1901eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 1911eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 192905e6a2fSBarry Smith int roworiented = aij->roworiented; 1938a729477SBarry Smith 1940520107fSSatish Balay /* Some Variables required in the macro */ 1954ee7247eSSatish Balay Mat A = aij->A; 1964ee7247eSSatish Balay Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 19730770e4dSSatish Balay int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 19830770e4dSSatish Balay Scalar *aa = a->a; 19930770e4dSSatish Balay 20030770e4dSSatish Balay Mat B = aij->B; 20130770e4dSSatish Balay Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 20230770e4dSSatish Balay int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 20330770e4dSSatish Balay Scalar *ba = b->a; 20430770e4dSSatish Balay 2054ee7247eSSatish Balay int *rp,ii,nrow,_i,rmax, N, col1; 20630770e4dSSatish Balay int nonew = a->nonew,shift = a->indexshift; 20730770e4dSSatish Balay Scalar *ap; 2084ee7247eSSatish Balay 2098a729477SBarry Smith for ( i=0; i<m; i++ ) { 2100a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 211e3372554SBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 212e3372554SBarry Smith if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 2130a198c4cSBarry Smith #endif 2144b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 2154b0e389bSBarry Smith row = im[i] - rstart; 2161eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 2174b0e389bSBarry Smith if (in[j] >= cstart && in[j] < cend){ 2184b0e389bSBarry Smith col = in[j] - cstart; 2194b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 22030770e4dSSatish Balay MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 2210520107fSSatish Balay /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 2221eb62cbbSBarry Smith } 2230a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 224e3372554SBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 225e3372554SBarry Smith else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 2260a198c4cSBarry Smith #endif 2271eb62cbbSBarry Smith else { 228227d817aSBarry Smith if (mat->was_assembled) { 229905e6a2fSBarry Smith if (!aij->colmap) { 230905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 231905e6a2fSBarry Smith } 232905e6a2fSBarry Smith col = aij->colmap[in[j]] - 1; 233ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 2342493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2354b0e389bSBarry Smith col = in[j]; 236d6dfbf8fSBarry Smith } 2379e25ed09SBarry Smith } 2384b0e389bSBarry Smith else col = in[j]; 2394b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 24030770e4dSSatish Balay MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 24130770e4dSSatish Balay /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 2421eb62cbbSBarry Smith } 2431eb62cbbSBarry Smith } 2441eb62cbbSBarry Smith } 2451eb62cbbSBarry Smith else { 24690f02eecSBarry Smith if (roworiented && !aij->donotstash) { 2474b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 2484b0e389bSBarry Smith } 2494b0e389bSBarry Smith else { 25090f02eecSBarry Smith if (!aij->donotstash) { 2514b0e389bSBarry Smith row = im[i]; 2524b0e389bSBarry Smith for ( j=0; j<n; j++ ) { 2534b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 2544b0e389bSBarry Smith } 2554b0e389bSBarry Smith } 2561eb62cbbSBarry Smith } 2578a729477SBarry Smith } 25890f02eecSBarry Smith } 2598a729477SBarry Smith return 0; 2608a729477SBarry Smith } 2618a729477SBarry Smith 2625615d1e5SSatish Balay #undef __FUNC__ 2635615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIAIJ" 2648f6be9afSLois Curfman McInnes int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 265b49de8d1SLois Curfman McInnes { 266b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 267b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 268b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 269b49de8d1SLois Curfman McInnes 270b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 271e3372554SBarry Smith if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 272e3372554SBarry Smith if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 273b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 274b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 275b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 276e3372554SBarry Smith if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 277e3372554SBarry Smith if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 278b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 279b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 280b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 281b49de8d1SLois Curfman McInnes } 282b49de8d1SLois Curfman McInnes else { 283905e6a2fSBarry Smith if (!aij->colmap) { 284905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 285905e6a2fSBarry Smith } 286905e6a2fSBarry Smith col = aij->colmap[idxn[j]] - 1; 287e60e1c95SSatish Balay if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 288d9d09a02SSatish Balay else { 289b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 290b49de8d1SLois Curfman McInnes } 291b49de8d1SLois Curfman McInnes } 292b49de8d1SLois Curfman McInnes } 293d9d09a02SSatish Balay } 294b49de8d1SLois Curfman McInnes else { 295e3372554SBarry Smith SETERRQ(1,0,"Only local values currently supported"); 296b49de8d1SLois Curfman McInnes } 297b49de8d1SLois Curfman McInnes } 298b49de8d1SLois Curfman McInnes return 0; 299b49de8d1SLois Curfman McInnes } 300b49de8d1SLois Curfman McInnes 3015615d1e5SSatish Balay #undef __FUNC__ 3025615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 3038f6be9afSLois Curfman McInnes int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 3048a729477SBarry Smith { 30544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 306d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 30717699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 30817699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 3091eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 3106abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 3111eb62cbbSBarry Smith InsertMode addv; 3121eb62cbbSBarry Smith Scalar *rvalues,*svalues; 3131eb62cbbSBarry Smith 3141eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 31547794344SBarry Smith MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 316dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 317e3372554SBarry Smith SETERRQ(1,0,"Some processors inserted others added"); 3181eb62cbbSBarry Smith } 31947794344SBarry Smith mat->insertmode = addv; /* in case this processor had no cache */ 3201eb62cbbSBarry Smith 3211eb62cbbSBarry Smith /* first count number of contributors to each processor */ 3220452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 323cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 3240452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 3251eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 3261eb62cbbSBarry Smith idx = aij->stash.idx[i]; 32717699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 3281eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3291eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 3308a729477SBarry Smith } 3318a729477SBarry Smith } 3328a729477SBarry Smith } 33317699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 3341eb62cbbSBarry Smith 3351eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3360452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 33717699dbbSLois Curfman McInnes MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 33817699dbbSLois Curfman McInnes nreceives = work[rank]; 33917699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 34017699dbbSLois Curfman McInnes nmax = work[rank]; 3410452661fSBarry Smith PetscFree(work); 3421eb62cbbSBarry Smith 3431eb62cbbSBarry Smith /* post receives: 3441eb62cbbSBarry Smith 1) each message will consist of ordered pairs 3451eb62cbbSBarry Smith (global index,value) we store the global index as a double 346d6dfbf8fSBarry Smith to simplify the message passing. 3471eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 3481eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 3491eb62cbbSBarry Smith this is a lot of wasted space. 3501eb62cbbSBarry Smith 3511eb62cbbSBarry Smith 3521eb62cbbSBarry Smith This could be done better. 3531eb62cbbSBarry Smith */ 3540452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 35578b31e54SBarry Smith CHKPTRQ(rvalues); 3560452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 35778b31e54SBarry Smith CHKPTRQ(recv_waits); 3581eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 359416022c9SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 3601eb62cbbSBarry Smith comm,recv_waits+i); 3611eb62cbbSBarry Smith } 3621eb62cbbSBarry Smith 3631eb62cbbSBarry Smith /* do sends: 3641eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3651eb62cbbSBarry Smith the ith processor 3661eb62cbbSBarry Smith */ 3670452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 3680452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 36978b31e54SBarry Smith CHKPTRQ(send_waits); 3700452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 3711eb62cbbSBarry Smith starts[0] = 0; 37217699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3731eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 3741eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 3751eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 3761eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 3771eb62cbbSBarry Smith } 3780452661fSBarry Smith PetscFree(owner); 3791eb62cbbSBarry Smith starts[0] = 0; 38017699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3811eb62cbbSBarry Smith count = 0; 38217699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 3831eb62cbbSBarry Smith if (procs[i]) { 384416022c9SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 3851eb62cbbSBarry Smith comm,send_waits+count++); 3861eb62cbbSBarry Smith } 3871eb62cbbSBarry Smith } 3880452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 3891eb62cbbSBarry Smith 3901eb62cbbSBarry Smith /* Free cache space */ 39155908cccSLois Curfman McInnes PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 39278b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 3931eb62cbbSBarry Smith 3941eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 3951eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 3961eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 3971eb62cbbSBarry Smith aij->rmax = nmax; 3981eb62cbbSBarry Smith 3991eb62cbbSBarry Smith return 0; 4001eb62cbbSBarry Smith } 40144a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 4021eb62cbbSBarry Smith 4035615d1e5SSatish Balay #undef __FUNC__ 4045615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 4058f6be9afSLois Curfman McInnes int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 4061eb62cbbSBarry Smith { 40744a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 4081eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 409416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 410905e6a2fSBarry Smith int row,col,other_disassembled; 4111eb62cbbSBarry Smith Scalar *values,val; 41247794344SBarry Smith InsertMode addv = mat->insertmode; 4131eb62cbbSBarry Smith 4141eb62cbbSBarry Smith /* wait on receives */ 4151eb62cbbSBarry Smith while (count) { 416d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 4171eb62cbbSBarry Smith /* unpack receives into our local space */ 418d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 419c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 4201eb62cbbSBarry Smith n = n/3; 4211eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 422227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - aij->rstart; 423227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 4241eb62cbbSBarry Smith val = values[3*i+2]; 4251eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 4261eb62cbbSBarry Smith col -= aij->cstart; 4271eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 4281eb62cbbSBarry Smith } 4291eb62cbbSBarry Smith else { 43055a1b374SBarry Smith if (mat->was_assembled || mat->assembled) { 431905e6a2fSBarry Smith if (!aij->colmap) { 432905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 433905e6a2fSBarry Smith } 434905e6a2fSBarry Smith col = aij->colmap[col] - 1; 435ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4362493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 437227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 438d6dfbf8fSBarry Smith } 4399e25ed09SBarry Smith } 4401eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 4411eb62cbbSBarry Smith } 4421eb62cbbSBarry Smith } 4431eb62cbbSBarry Smith count--; 4441eb62cbbSBarry Smith } 4450452661fSBarry Smith PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 4461eb62cbbSBarry Smith 4471eb62cbbSBarry Smith /* wait on sends */ 4481eb62cbbSBarry Smith if (aij->nsends) { 4490a198c4cSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 4501eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 4510452661fSBarry Smith PetscFree(send_status); 4521eb62cbbSBarry Smith } 4530452661fSBarry Smith PetscFree(aij->send_waits); PetscFree(aij->svalues); 4541eb62cbbSBarry Smith 45578b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 45678b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 4571eb62cbbSBarry Smith 4582493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 4592493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 460227d817aSBarry Smith MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 461227d817aSBarry Smith if (mat->was_assembled && !other_disassembled) { 4622493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 4632493cbb0SBarry Smith } 4642493cbb0SBarry Smith 4656d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 46678b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 4675e42470aSBarry Smith } 46878b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 46978b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 4705e42470aSBarry Smith 4717a0afa10SBarry Smith if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 4728a729477SBarry Smith return 0; 4738a729477SBarry Smith } 4748a729477SBarry Smith 4755615d1e5SSatish Balay #undef __FUNC__ 4765615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIAIJ" 4778f6be9afSLois Curfman McInnes int MatZeroEntries_MPIAIJ(Mat A) 4781eb62cbbSBarry Smith { 47944a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 480dbd7a890SLois Curfman McInnes int ierr; 48178b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 48278b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 4831eb62cbbSBarry Smith return 0; 4841eb62cbbSBarry Smith } 4851eb62cbbSBarry Smith 4861eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 4871eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 4881eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 4891eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 4901eb62cbbSBarry Smith routine. 4911eb62cbbSBarry Smith */ 4925615d1e5SSatish Balay #undef __FUNC__ 4935615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIAIJ" 4948f6be9afSLois Curfman McInnes int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 4951eb62cbbSBarry Smith { 49644a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 49717699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 4986abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 49917699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 5005392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 501d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 502d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 5031eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 5041eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 5051eb62cbbSBarry Smith IS istmp; 5061eb62cbbSBarry Smith 50777c4ece6SBarry Smith ierr = ISGetSize(is,&N); CHKERRQ(ierr); 50878b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 5091eb62cbbSBarry Smith 5101eb62cbbSBarry Smith /* first count number of contributors to each processor */ 5110452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 512cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 5130452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 5141eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 5151eb62cbbSBarry Smith idx = rows[i]; 5161eb62cbbSBarry Smith found = 0; 51717699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 5181eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 5191eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 5201eb62cbbSBarry Smith } 5211eb62cbbSBarry Smith } 522e3372554SBarry Smith if (!found) SETERRQ(1,0,"Index out of range"); 5231eb62cbbSBarry Smith } 52417699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 5251eb62cbbSBarry Smith 5261eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 5270452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 52817699dbbSLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 52917699dbbSLois Curfman McInnes nrecvs = work[rank]; 53017699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 53117699dbbSLois Curfman McInnes nmax = work[rank]; 5320452661fSBarry Smith PetscFree(work); 5331eb62cbbSBarry Smith 5341eb62cbbSBarry Smith /* post receives: */ 5350452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 53678b31e54SBarry Smith CHKPTRQ(rvalues); 5370452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 53878b31e54SBarry Smith CHKPTRQ(recv_waits); 5391eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 540416022c9SBarry Smith MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 5411eb62cbbSBarry Smith } 5421eb62cbbSBarry Smith 5431eb62cbbSBarry Smith /* do sends: 5441eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 5451eb62cbbSBarry Smith the ith processor 5461eb62cbbSBarry Smith */ 5470452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 5480452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 54978b31e54SBarry Smith CHKPTRQ(send_waits); 5500452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 5511eb62cbbSBarry Smith starts[0] = 0; 55217699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 5531eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 5541eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 5551eb62cbbSBarry Smith } 5561eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 5571eb62cbbSBarry Smith 5581eb62cbbSBarry Smith starts[0] = 0; 55917699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 5601eb62cbbSBarry Smith count = 0; 56117699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 5621eb62cbbSBarry Smith if (procs[i]) { 563416022c9SBarry Smith MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 5641eb62cbbSBarry Smith } 5651eb62cbbSBarry Smith } 5660452661fSBarry Smith PetscFree(starts); 5671eb62cbbSBarry Smith 56817699dbbSLois Curfman McInnes base = owners[rank]; 5691eb62cbbSBarry Smith 5701eb62cbbSBarry Smith /* wait on receives */ 5710452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 5721eb62cbbSBarry Smith source = lens + nrecvs; 5731eb62cbbSBarry Smith count = nrecvs; slen = 0; 5741eb62cbbSBarry Smith while (count) { 575d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 5761eb62cbbSBarry Smith /* unpack receives into our local space */ 5771eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 578d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 579d6dfbf8fSBarry Smith lens[imdex] = n; 5801eb62cbbSBarry Smith slen += n; 5811eb62cbbSBarry Smith count--; 5821eb62cbbSBarry Smith } 5830452661fSBarry Smith PetscFree(recv_waits); 5841eb62cbbSBarry Smith 5851eb62cbbSBarry Smith /* move the data into the send scatter */ 5860452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 5871eb62cbbSBarry Smith count = 0; 5881eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 5891eb62cbbSBarry Smith values = rvalues + i*nmax; 5901eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 5911eb62cbbSBarry Smith lrows[count++] = values[j] - base; 5921eb62cbbSBarry Smith } 5931eb62cbbSBarry Smith } 5940452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 5950452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 5961eb62cbbSBarry Smith 5971eb62cbbSBarry Smith /* actually zap the local rows */ 598537820f0SBarry Smith ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 599464493b3SBarry Smith PLogObjectParent(A,istmp); 6000452661fSBarry Smith PetscFree(lrows); 60178b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 60278b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 60378b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 6041eb62cbbSBarry Smith 6051eb62cbbSBarry Smith /* wait on sends */ 6061eb62cbbSBarry Smith if (nsends) { 6070452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 60878b31e54SBarry Smith CHKPTRQ(send_status); 6091eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 6100452661fSBarry Smith PetscFree(send_status); 6111eb62cbbSBarry Smith } 6120452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 6131eb62cbbSBarry Smith 6141eb62cbbSBarry Smith return 0; 6151eb62cbbSBarry Smith } 6161eb62cbbSBarry Smith 6175615d1e5SSatish Balay #undef __FUNC__ 6185615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIAIJ" 6198f6be9afSLois Curfman McInnes int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 6201eb62cbbSBarry Smith { 621416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 622fbd6ef76SBarry Smith int ierr,nt; 623416022c9SBarry Smith 624a2ce50c7SBarry Smith ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 625fbd6ef76SBarry Smith if (nt != a->n) { 626f40265daSLois Curfman McInnes SETERRQ(1,0,"Incompatible partition of A and xx"); 627fbd6ef76SBarry Smith } 62843a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 62938597bd4SSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 63043a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 63138597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 6321eb62cbbSBarry Smith return 0; 6331eb62cbbSBarry Smith } 6341eb62cbbSBarry Smith 6355615d1e5SSatish Balay #undef __FUNC__ 6365615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIAIJ" 6378f6be9afSLois Curfman McInnes int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 638da3a660dSBarry Smith { 639416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 640da3a660dSBarry Smith int ierr; 64143a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 64238597bd4SSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 64343a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 64438597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 645da3a660dSBarry Smith return 0; 646da3a660dSBarry Smith } 647da3a660dSBarry Smith 6485615d1e5SSatish Balay #undef __FUNC__ 6495615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIAIJ" 6508f6be9afSLois Curfman McInnes int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 651da3a660dSBarry Smith { 652416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 653da3a660dSBarry Smith int ierr; 654da3a660dSBarry Smith 655da3a660dSBarry Smith /* do nondiagonal part */ 65638597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 657da3a660dSBarry Smith /* send it on its way */ 658537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 659da3a660dSBarry Smith /* do local part */ 66038597bd4SSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 661da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 662da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 663da3a660dSBarry Smith /* but is not perhaps always true. */ 664537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 665da3a660dSBarry Smith return 0; 666da3a660dSBarry Smith } 667da3a660dSBarry Smith 6685615d1e5SSatish Balay #undef __FUNC__ 6695615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIAIJ" 6708f6be9afSLois Curfman McInnes int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 671da3a660dSBarry Smith { 672416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 673da3a660dSBarry Smith int ierr; 674da3a660dSBarry Smith 675da3a660dSBarry Smith /* do nondiagonal part */ 67638597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 677da3a660dSBarry Smith /* send it on its way */ 678537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 679da3a660dSBarry Smith /* do local part */ 68038597bd4SSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 681da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 682da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 683da3a660dSBarry Smith /* but is not perhaps always true. */ 6840a198c4cSBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 685da3a660dSBarry Smith return 0; 686da3a660dSBarry Smith } 687da3a660dSBarry Smith 6881eb62cbbSBarry Smith /* 6891eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 6901eb62cbbSBarry Smith diagonal block 6911eb62cbbSBarry Smith */ 6925615d1e5SSatish Balay #undef __FUNC__ 6935615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIAIJ" 6948f6be9afSLois Curfman McInnes int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 6951eb62cbbSBarry Smith { 696416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 69744cd7ae7SLois Curfman McInnes if (a->M != a->N) 698e3372554SBarry Smith SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 6995baf8537SBarry Smith if (a->rstart != a->cstart || a->rend != a->cend) { 700e3372554SBarry Smith SETERRQ(1,0,"row partition must equal col partition"); } 701416022c9SBarry Smith return MatGetDiagonal(a->A,v); 7021eb62cbbSBarry Smith } 7031eb62cbbSBarry Smith 7045615d1e5SSatish Balay #undef __FUNC__ 7055615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIAIJ" 7068f6be9afSLois Curfman McInnes int MatScale_MPIAIJ(Scalar *aa,Mat A) 707052efed2SBarry Smith { 708052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 709052efed2SBarry Smith int ierr; 710052efed2SBarry Smith ierr = MatScale(aa,a->A); CHKERRQ(ierr); 711052efed2SBarry Smith ierr = MatScale(aa,a->B); CHKERRQ(ierr); 712052efed2SBarry Smith return 0; 713052efed2SBarry Smith } 714052efed2SBarry Smith 7155615d1e5SSatish Balay #undef __FUNC__ 7165eea60f9SBarry Smith #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */ 7178f6be9afSLois Curfman McInnes int MatDestroy_MPIAIJ(PetscObject obj) 7181eb62cbbSBarry Smith { 7191eb62cbbSBarry Smith Mat mat = (Mat) obj; 72044a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 7211eb62cbbSBarry Smith int ierr; 722a5a9c739SBarry Smith #if defined(PETSC_LOG) 7236e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 724a5a9c739SBarry Smith #endif 7250452661fSBarry Smith PetscFree(aij->rowners); 72678b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 72778b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 7280452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 7290452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 7301eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 731a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 7327a0afa10SBarry Smith if (aij->rowvalues) PetscFree(aij->rowvalues); 7330452661fSBarry Smith PetscFree(aij); 73490f02eecSBarry Smith if (mat->mapping) { 73590f02eecSBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 73690f02eecSBarry Smith } 737a5a9c739SBarry Smith PLogObjectDestroy(mat); 7380452661fSBarry Smith PetscHeaderDestroy(mat); 7391eb62cbbSBarry Smith return 0; 7401eb62cbbSBarry Smith } 741ee50ffe9SBarry Smith 7425615d1e5SSatish Balay #undef __FUNC__ 7435eea60f9SBarry Smith #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */ 7448f6be9afSLois Curfman McInnes extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 7451eb62cbbSBarry Smith { 746416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 747416022c9SBarry Smith int ierr; 748416022c9SBarry Smith 74917699dbbSLois Curfman McInnes if (aij->size == 1) { 750416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 751416022c9SBarry Smith } 752e3372554SBarry Smith else SETERRQ(1,0,"Only uniprocessor output supported"); 753416022c9SBarry Smith return 0; 754416022c9SBarry Smith } 755416022c9SBarry Smith 7565615d1e5SSatish Balay #undef __FUNC__ 7575eea60f9SBarry Smith #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */ 7588f6be9afSLois Curfman McInnes extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 759416022c9SBarry Smith { 76044a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 761dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 762a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 763d636dbe3SBarry Smith FILE *fd; 76419bcc07fSBarry Smith ViewerType vtype; 765416022c9SBarry Smith 76619bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 76719bcc07fSBarry Smith if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 76890ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 7690a198c4cSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 7704e220ebcSLois Curfman McInnes MatInfo info; 7714e220ebcSLois Curfman McInnes int flg; 772a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 77390ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 7744e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 77595e01e2fSLois Curfman McInnes ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 77677c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 77795e01e2fSLois Curfman McInnes if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 7784e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 77995e01e2fSLois Curfman McInnes else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 7804e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 7814e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 7824e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 7834e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 7844e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 785a56f8943SBarry Smith fflush(fd); 78677c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 787a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 788416022c9SBarry Smith return 0; 789416022c9SBarry Smith } 7900a198c4cSBarry Smith else if (format == VIEWER_FORMAT_ASCII_INFO) { 79108480c60SBarry Smith return 0; 79208480c60SBarry Smith } 793416022c9SBarry Smith } 794416022c9SBarry Smith 79519bcc07fSBarry Smith if (vtype == DRAW_VIEWER) { 79619bcc07fSBarry Smith Draw draw; 79719bcc07fSBarry Smith PetscTruth isnull; 79819bcc07fSBarry Smith ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 79919bcc07fSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 80019bcc07fSBarry Smith } 80119bcc07fSBarry Smith 80219bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER) { 80390ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 80477c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 805d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 80617699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 8071eb62cbbSBarry Smith aij->cend); 80878b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 80978b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 810d13ab20cSBarry Smith fflush(fd); 81177c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 812d13ab20cSBarry Smith } 813416022c9SBarry Smith else { 814a56f8943SBarry Smith int size = aij->size; 815a56f8943SBarry Smith rank = aij->rank; 81617699dbbSLois Curfman McInnes if (size == 1) { 81778b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 81895373324SBarry Smith } 81995373324SBarry Smith else { 82095373324SBarry Smith /* assemble the entire matrix onto first processor. */ 82195373324SBarry Smith Mat A; 822ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 8232eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 82495373324SBarry Smith Scalar *a; 8252ee70a88SLois Curfman McInnes 82617699dbbSLois Curfman McInnes if (!rank) { 827b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 828c451ab25SLois Curfman McInnes CHKERRQ(ierr); 82995373324SBarry Smith } 83095373324SBarry Smith else { 831b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 832c451ab25SLois Curfman McInnes CHKERRQ(ierr); 83395373324SBarry Smith } 834464493b3SBarry Smith PLogObjectParent(mat,A); 835416022c9SBarry Smith 83695373324SBarry Smith /* copy over the A part */ 837ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 8382ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 83995373324SBarry Smith row = aij->rstart; 840dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 84195373324SBarry Smith for ( i=0; i<m; i++ ) { 842416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 84395373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 84495373324SBarry Smith } 8452ee70a88SLois Curfman McInnes aj = Aloc->j; 846dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 84795373324SBarry Smith 84895373324SBarry Smith /* copy over the B part */ 849ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 8502ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 85195373324SBarry Smith row = aij->rstart; 8520452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 853dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 85495373324SBarry Smith for ( i=0; i<m; i++ ) { 855416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 85695373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 85795373324SBarry Smith } 8580452661fSBarry Smith PetscFree(ct); 8596d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 8606d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 86117699dbbSLois Curfman McInnes if (!rank) { 86278b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 86395373324SBarry Smith } 86478b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 86595373324SBarry Smith } 86695373324SBarry Smith } 8671eb62cbbSBarry Smith return 0; 8681eb62cbbSBarry Smith } 8691eb62cbbSBarry Smith 8705615d1e5SSatish Balay #undef __FUNC__ 8715eea60f9SBarry Smith #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */ 8728f6be9afSLois Curfman McInnes int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 873416022c9SBarry Smith { 874416022c9SBarry Smith Mat mat = (Mat) obj; 875416022c9SBarry Smith int ierr; 87619bcc07fSBarry Smith ViewerType vtype; 877416022c9SBarry Smith 87819bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 87919bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 88019bcc07fSBarry Smith vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 881d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 882416022c9SBarry Smith } 88319bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 884416022c9SBarry Smith return MatView_MPIAIJ_Binary(mat,viewer); 885416022c9SBarry Smith } 886416022c9SBarry Smith return 0; 887416022c9SBarry Smith } 888416022c9SBarry Smith 8891eb62cbbSBarry Smith /* 8901eb62cbbSBarry Smith This has to provide several versions. 8911eb62cbbSBarry Smith 8921eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 8931eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 894d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 8951eb62cbbSBarry Smith */ 8965615d1e5SSatish Balay #undef __FUNC__ 8975615d1e5SSatish Balay #define __FUNC__ "MatRelax_MPIAIJ" 8988f6be9afSLois Curfman McInnes int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 899dbb450caSBarry Smith double fshift,int its,Vec xx) 9008a729477SBarry Smith { 90144a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 902d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 903ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 904c16cb8f2SBarry Smith Scalar *b,*x,*xs,*ls,d,*v,sum; 9056abc6512SBarry Smith int ierr,*idx, *diag; 906416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 9078a729477SBarry Smith 908d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 909dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 910dbb450caSBarry Smith ls = ls + shift; 911ec8511deSBarry Smith if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 912d6dfbf8fSBarry Smith diag = A->diag; 913c16cb8f2SBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 914da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 91538597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 916da3a660dSBarry Smith } 91743a90d84SBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 91878b31e54SBarry Smith CHKERRQ(ierr); 91943a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 92078b31e54SBarry Smith CHKERRQ(ierr); 921d6dfbf8fSBarry Smith while (its--) { 922d6dfbf8fSBarry Smith /* go down through the rows */ 923d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 924d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 925dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 926dbb450caSBarry Smith v = A->a + A->i[i] + shift; 927d6dfbf8fSBarry Smith sum = b[i]; 928d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 929dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 930d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 931dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 932dbb450caSBarry Smith v = B->a + B->i[i] + shift; 933d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 93455a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 935d6dfbf8fSBarry Smith } 936d6dfbf8fSBarry Smith /* come up through the rows */ 937d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 938d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 939dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 940dbb450caSBarry Smith v = A->a + A->i[i] + shift; 941d6dfbf8fSBarry Smith sum = b[i]; 942d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 943dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 944d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 945dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 946dbb450caSBarry Smith v = B->a + B->i[i] + shift; 947d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 94855a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 949d6dfbf8fSBarry Smith } 950d6dfbf8fSBarry Smith } 951d6dfbf8fSBarry Smith } 952d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 953da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 95438597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 955da3a660dSBarry Smith } 95643a90d84SBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 95778b31e54SBarry Smith CHKERRQ(ierr); 95843a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 95978b31e54SBarry Smith CHKERRQ(ierr); 960d6dfbf8fSBarry Smith while (its--) { 961d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 962d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 963dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 964dbb450caSBarry Smith v = A->a + A->i[i] + shift; 965d6dfbf8fSBarry Smith sum = b[i]; 966d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 967dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 968d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 969dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 970dbb450caSBarry Smith v = B->a + B->i[i] + shift; 971d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 97255a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 973d6dfbf8fSBarry Smith } 974d6dfbf8fSBarry Smith } 975d6dfbf8fSBarry Smith } 976d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 977da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 97838597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 979da3a660dSBarry Smith } 98043a90d84SBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 98178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 98243a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 98378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 984d6dfbf8fSBarry Smith while (its--) { 985d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 986d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 987dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 988dbb450caSBarry Smith v = A->a + A->i[i] + shift; 989d6dfbf8fSBarry Smith sum = b[i]; 990d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 991dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 992d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 993dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 994dbb450caSBarry Smith v = B->a + B->i[i] + shift; 995d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 99655a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 997d6dfbf8fSBarry Smith } 998d6dfbf8fSBarry Smith } 999d6dfbf8fSBarry Smith } 1000c16cb8f2SBarry Smith else { 1001e3372554SBarry Smith SETERRQ(1,0,"Option not supported"); 1002c16cb8f2SBarry Smith } 10038a729477SBarry Smith return 0; 10048a729477SBarry Smith } 1005a66be287SLois Curfman McInnes 10065615d1e5SSatish Balay #undef __FUNC__ 10075eea60f9SBarry Smith #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */ 10088f6be9afSLois Curfman McInnes int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1009a66be287SLois Curfman McInnes { 1010a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1011a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 10124e220ebcSLois Curfman McInnes int ierr; 10134e220ebcSLois Curfman McInnes double isend[5], irecv[5]; 1014a66be287SLois Curfman McInnes 10154e220ebcSLois Curfman McInnes info->rows_global = (double)mat->M; 10164e220ebcSLois Curfman McInnes info->columns_global = (double)mat->N; 10174e220ebcSLois Curfman McInnes info->rows_local = (double)mat->m; 10184e220ebcSLois Curfman McInnes info->columns_local = (double)mat->N; 10194e220ebcSLois Curfman McInnes info->block_size = 1.0; 10204e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 10214e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 10224e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 10234e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 10244e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 10254e220ebcSLois Curfman McInnes isend[3] += info->memory; isend[4] += info->mallocs; 1026a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 10274e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 10284e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 10294e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 10304e220ebcSLois Curfman McInnes info->memory = isend[3]; 10314e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 1032a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 10334e220ebcSLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 10344e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 10354e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 10364e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 10374e220ebcSLois Curfman McInnes info->memory = irecv[3]; 10384e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1039a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 10404e220ebcSLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 10414e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 10424e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 10434e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 10444e220ebcSLois Curfman McInnes info->memory = irecv[3]; 10454e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1046a66be287SLois Curfman McInnes } 10474e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 10484e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 10494e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 10504e220ebcSLois Curfman McInnes 1051a66be287SLois Curfman McInnes return 0; 1052a66be287SLois Curfman McInnes } 1053a66be287SLois Curfman McInnes 1054299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1055299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1056299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1057299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1058299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1059299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1060299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1061299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1062299609e3SLois Curfman McInnes 10635615d1e5SSatish Balay #undef __FUNC__ 10645eea60f9SBarry Smith #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */ 10658f6be9afSLois Curfman McInnes int MatSetOption_MPIAIJ(Mat A,MatOption op) 1066c74985f6SBarry Smith { 1067c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1068c74985f6SBarry Smith 10696d4a8577SBarry Smith if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 10706d4a8577SBarry Smith op == MAT_YES_NEW_NONZERO_LOCATIONS || 10716da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1072c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 1073c2653b3dSLois Curfman McInnes op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1074b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 1075b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 1076b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1077aeafbbfcSLois Curfman McInnes a->roworiented = 1; 1078c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 1079c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 1080b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 10816da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 10826d4a8577SBarry Smith op == MAT_SYMMETRIC || 10836d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 10846d4a8577SBarry Smith op == MAT_YES_NEW_DIAGONALS) 108594a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 10866d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) { 10874b0e389bSBarry Smith a->roworiented = 0; 10884b0e389bSBarry Smith MatSetOption(a->A,op); 10894b0e389bSBarry Smith MatSetOption(a->B,op); 1090*2b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 109190f02eecSBarry Smith a->donotstash = 1; 109290f02eecSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) 1093e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1094c0bbcb79SLois Curfman McInnes else 1095e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1096c74985f6SBarry Smith return 0; 1097c74985f6SBarry Smith } 1098c74985f6SBarry Smith 10995615d1e5SSatish Balay #undef __FUNC__ 11005eea60f9SBarry Smith #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */ 11018f6be9afSLois Curfman McInnes int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1102c74985f6SBarry Smith { 110344a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1104c74985f6SBarry Smith *m = mat->M; *n = mat->N; 1105c74985f6SBarry Smith return 0; 1106c74985f6SBarry Smith } 1107c74985f6SBarry Smith 11085615d1e5SSatish Balay #undef __FUNC__ 11095eea60f9SBarry Smith #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */ 11108f6be9afSLois Curfman McInnes int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1111c74985f6SBarry Smith { 111244a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1113b7c46309SBarry Smith *m = mat->m; *n = mat->N; 1114c74985f6SBarry Smith return 0; 1115c74985f6SBarry Smith } 1116c74985f6SBarry Smith 11175615d1e5SSatish Balay #undef __FUNC__ 11185eea60f9SBarry Smith #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */ 11198f6be9afSLois Curfman McInnes int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1120c74985f6SBarry Smith { 112144a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1122c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 1123c74985f6SBarry Smith return 0; 1124c74985f6SBarry Smith } 1125c74985f6SBarry Smith 11266d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11276d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11286d84be18SBarry Smith 11295615d1e5SSatish Balay #undef __FUNC__ 11305615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIAIJ" 11316d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 113239e00950SLois Curfman McInnes { 1133154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 113470f0671dSBarry Smith Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1135154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1136154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 113770f0671dSBarry Smith int *cmap, *idx_p; 113839e00950SLois Curfman McInnes 1139e3372554SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 11407a0afa10SBarry Smith mat->getrowactive = PETSC_TRUE; 11417a0afa10SBarry Smith 114270f0671dSBarry Smith if (!mat->rowvalues && (idx || v)) { 11437a0afa10SBarry Smith /* 11447a0afa10SBarry Smith allocate enough space to hold information from the longest row. 11457a0afa10SBarry Smith */ 11467a0afa10SBarry Smith Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1147c16cb8f2SBarry Smith int max = 1,m = mat->m,tmp; 1148c16cb8f2SBarry Smith for ( i=0; i<m; i++ ) { 11497a0afa10SBarry Smith tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 11507a0afa10SBarry Smith if (max < tmp) { max = tmp; } 11517a0afa10SBarry Smith } 11527a0afa10SBarry Smith mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 11537a0afa10SBarry Smith CHKPTRQ(mat->rowvalues); 11547a0afa10SBarry Smith mat->rowindices = (int *) (mat->rowvalues + max); 11557a0afa10SBarry Smith } 11567a0afa10SBarry Smith 1157e3372554SBarry Smith if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1158abc0e9e4SLois Curfman McInnes lrow = row - rstart; 115939e00950SLois Curfman McInnes 1160154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1161154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1162154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 116338597bd4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 116438597bd4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1165154123eaSLois Curfman McInnes nztot = nzA + nzB; 1166154123eaSLois Curfman McInnes 116770f0671dSBarry Smith cmap = mat->garray; 1168154123eaSLois Curfman McInnes if (v || idx) { 1169154123eaSLois Curfman McInnes if (nztot) { 1170154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 117170f0671dSBarry Smith int imark = -1; 1172154123eaSLois Curfman McInnes if (v) { 117370f0671dSBarry Smith *v = v_p = mat->rowvalues; 117439e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 117570f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1176154123eaSLois Curfman McInnes else break; 1177154123eaSLois Curfman McInnes } 1178154123eaSLois Curfman McInnes imark = i; 117970f0671dSBarry Smith for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 118070f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1181154123eaSLois Curfman McInnes } 1182154123eaSLois Curfman McInnes if (idx) { 118370f0671dSBarry Smith *idx = idx_p = mat->rowindices; 118470f0671dSBarry Smith if (imark > -1) { 118570f0671dSBarry Smith for ( i=0; i<imark; i++ ) { 118670f0671dSBarry Smith idx_p[i] = cmap[cworkB[i]]; 118770f0671dSBarry Smith } 118870f0671dSBarry Smith } else { 1189154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 119070f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1191154123eaSLois Curfman McInnes else break; 1192154123eaSLois Curfman McInnes } 1193154123eaSLois Curfman McInnes imark = i; 119470f0671dSBarry Smith } 119570f0671dSBarry Smith for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 119670f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 119739e00950SLois Curfman McInnes } 119839e00950SLois Curfman McInnes } 11991ca473b0SSatish Balay else { 12001ca473b0SSatish Balay if (idx) *idx = 0; 12011ca473b0SSatish Balay if (v) *v = 0; 12021ca473b0SSatish Balay } 1203154123eaSLois Curfman McInnes } 120439e00950SLois Curfman McInnes *nz = nztot; 120538597bd4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 120638597bd4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 120739e00950SLois Curfman McInnes return 0; 120839e00950SLois Curfman McInnes } 120939e00950SLois Curfman McInnes 12105615d1e5SSatish Balay #undef __FUNC__ 12115eea60f9SBarry Smith #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */ 12126d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 121339e00950SLois Curfman McInnes { 12147a0afa10SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 12157a0afa10SBarry Smith if (aij->getrowactive == PETSC_FALSE) { 1216e3372554SBarry Smith SETERRQ(1,0,"MatGetRow not called"); 12177a0afa10SBarry Smith } 12187a0afa10SBarry Smith aij->getrowactive = PETSC_FALSE; 121939e00950SLois Curfman McInnes return 0; 122039e00950SLois Curfman McInnes } 122139e00950SLois Curfman McInnes 12225615d1e5SSatish Balay #undef __FUNC__ 12235615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIAIJ" 12248f6be9afSLois Curfman McInnes int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1225855ac2c5SLois Curfman McInnes { 1226855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1227ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1228416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1229416022c9SBarry Smith double sum = 0.0; 123004ca555eSLois Curfman McInnes Scalar *v; 123104ca555eSLois Curfman McInnes 123217699dbbSLois Curfman McInnes if (aij->size == 1) { 123314183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 123437fa93a5SLois Curfman McInnes } else { 123504ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 123604ca555eSLois Curfman McInnes v = amat->a; 123704ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 123804ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 123904ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 124004ca555eSLois Curfman McInnes #else 124104ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 124204ca555eSLois Curfman McInnes #endif 124304ca555eSLois Curfman McInnes } 124404ca555eSLois Curfman McInnes v = bmat->a; 124504ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 124604ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 124704ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 124804ca555eSLois Curfman McInnes #else 124904ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 125004ca555eSLois Curfman McInnes #endif 125104ca555eSLois Curfman McInnes } 12526d84be18SBarry Smith MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 125304ca555eSLois Curfman McInnes *norm = sqrt(*norm); 125404ca555eSLois Curfman McInnes } 125504ca555eSLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 125604ca555eSLois Curfman McInnes double *tmp, *tmp2; 125704ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 12580452661fSBarry Smith tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 12590452661fSBarry Smith tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1260cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 126104ca555eSLois Curfman McInnes *norm = 0.0; 126204ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 126304ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1264579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 126504ca555eSLois Curfman McInnes } 126604ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 126704ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1268579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 126904ca555eSLois Curfman McInnes } 12706d84be18SBarry Smith MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 127104ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 127204ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 127304ca555eSLois Curfman McInnes } 12740452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 127504ca555eSLois Curfman McInnes } 127604ca555eSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 1277515d9167SLois Curfman McInnes double ntemp = 0.0; 127804ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1279dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 128004ca555eSLois Curfman McInnes sum = 0.0; 128104ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1282cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 128304ca555eSLois Curfman McInnes } 1284dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 128504ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1286cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 128704ca555eSLois Curfman McInnes } 1288515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 128904ca555eSLois Curfman McInnes } 12906d84be18SBarry Smith MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 129104ca555eSLois Curfman McInnes } 129204ca555eSLois Curfman McInnes else { 1293e3372554SBarry Smith SETERRQ(1,0,"No support for two norm"); 129404ca555eSLois Curfman McInnes } 129537fa93a5SLois Curfman McInnes } 1296855ac2c5SLois Curfman McInnes return 0; 1297855ac2c5SLois Curfman McInnes } 1298855ac2c5SLois Curfman McInnes 12995615d1e5SSatish Balay #undef __FUNC__ 13005615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIAIJ" 13018f6be9afSLois Curfman McInnes int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1302b7c46309SBarry Smith { 1303b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1304dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1305416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1306416022c9SBarry Smith Mat B; 1307b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1308b7c46309SBarry Smith Scalar *array; 1309b7c46309SBarry Smith 13103638b69dSLois Curfman McInnes if (matout == PETSC_NULL && M != N) 1311e3372554SBarry Smith SETERRQ(1,0,"Square matrix only for in-place"); 1312b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1313b4fd4287SBarry Smith PETSC_NULL,&B); CHKERRQ(ierr); 1314b7c46309SBarry Smith 1315b7c46309SBarry Smith /* copy over the A part */ 1316ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1317b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1318b7c46309SBarry Smith row = a->rstart; 1319dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1320b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1321416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1322b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1323b7c46309SBarry Smith } 1324b7c46309SBarry Smith aj = Aloc->j; 13254af08d9eSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1326b7c46309SBarry Smith 1327b7c46309SBarry Smith /* copy over the B part */ 1328ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1329b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1330b7c46309SBarry Smith row = a->rstart; 13310452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1332dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1333b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1334416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1335b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1336b7c46309SBarry Smith } 13370452661fSBarry Smith PetscFree(ct); 13386d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 13396d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 13403638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 13410de55854SLois Curfman McInnes *matout = B; 13420de55854SLois Curfman McInnes } else { 13430de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 13440452661fSBarry Smith PetscFree(a->rowners); 13450de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 13460de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 13470452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 13480452661fSBarry Smith if (a->garray) PetscFree(a->garray); 13490de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1350a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 13510452661fSBarry Smith PetscFree(a); 1352416022c9SBarry Smith PetscMemcpy(A,B,sizeof(struct _Mat)); 13530452661fSBarry Smith PetscHeaderDestroy(B); 13540de55854SLois Curfman McInnes } 1355b7c46309SBarry Smith return 0; 1356b7c46309SBarry Smith } 1357b7c46309SBarry Smith 13585615d1e5SSatish Balay #undef __FUNC__ 13595615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIAIJ" 13604b967eb1SSatish Balay int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1361a008b906SSatish Balay { 13624b967eb1SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 13634b967eb1SSatish Balay Mat a = aij->A, b = aij->B; 1364a008b906SSatish Balay int ierr,s1,s2,s3; 1365a008b906SSatish Balay 13664b967eb1SSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 13674b967eb1SSatish Balay if (rr) { 13684b967eb1SSatish Balay s3 = aij->n; 13694b967eb1SSatish Balay VecGetLocalSize_Fast(rr,s1); 1370e3372554SBarry Smith if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 13714b967eb1SSatish Balay /* Overlap communication with computation. */ 137243a90d84SBarry Smith ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1373a008b906SSatish Balay } 13744b967eb1SSatish Balay if (ll) { 13754b967eb1SSatish Balay VecGetLocalSize_Fast(ll,s1); 1376e3372554SBarry Smith if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1377c351683dSSatish Balay ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 13784b967eb1SSatish Balay } 13794b967eb1SSatish Balay /* scale the diagonal block */ 1380c351683dSSatish Balay ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 13814b967eb1SSatish Balay 13824b967eb1SSatish Balay if (rr) { 13834b967eb1SSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 138443a90d84SBarry Smith ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1385c351683dSSatish Balay ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 13864b967eb1SSatish Balay } 13874b967eb1SSatish Balay 1388a008b906SSatish Balay return 0; 1389a008b906SSatish Balay } 1390a008b906SSatish Balay 1391a008b906SSatish Balay 1392682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat); 13935615d1e5SSatish Balay #undef __FUNC__ 13945eea60f9SBarry Smith #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */ 13958f6be9afSLois Curfman McInnes int MatPrintHelp_MPIAIJ(Mat A) 1396682d7d0cSBarry Smith { 1397682d7d0cSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1398682d7d0cSBarry Smith 1399682d7d0cSBarry Smith if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1400682d7d0cSBarry Smith else return 0; 1401682d7d0cSBarry Smith } 1402682d7d0cSBarry Smith 14035615d1e5SSatish Balay #undef __FUNC__ 14045eea60f9SBarry Smith #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */ 14058f6be9afSLois Curfman McInnes int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 14065a838052SSatish Balay { 14075a838052SSatish Balay *bs = 1; 14085a838052SSatish Balay return 0; 14095a838052SSatish Balay } 14105615d1e5SSatish Balay #undef __FUNC__ 14115eea60f9SBarry Smith #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */ 14128f6be9afSLois Curfman McInnes int MatSetUnfactored_MPIAIJ(Mat A) 1413bb5a7306SBarry Smith { 1414bb5a7306SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1415bb5a7306SBarry Smith int ierr; 1416bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1417bb5a7306SBarry Smith return 0; 1418bb5a7306SBarry Smith } 1419bb5a7306SBarry Smith 14208f6be9afSLois Curfman McInnes extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 14212f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 14220a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 14230a198c4cSBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 14248a729477SBarry Smith /* -------------------------------------------------------------------*/ 14252ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 142639e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 142744a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 142844a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 142936ce4990SBarry Smith 0,0, 143036ce4990SBarry Smith 0,0, 143136ce4990SBarry Smith 0,0, 143244a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1433b7c46309SBarry Smith MatTranspose_MPIAIJ, 1434a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1435a008b906SSatish Balay MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1436ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 14371eb62cbbSBarry Smith 0, 1438299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 143936ce4990SBarry Smith 0,0,0,0, 1440d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 144136ce4990SBarry Smith 0,0, 144294a9d846SBarry Smith 0,0,MatConvertSameType_MPIAIJ,0,0, 1443b49de8d1SLois Curfman McInnes 0,0,0, 1444598137ffSSatish Balay MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1445052efed2SBarry Smith MatPrintHelp_MPIAIJ, 14463b2fbd54SBarry Smith MatScale_MPIAIJ,0,0,0, 14470a198c4cSBarry Smith MatGetBlockSize_MPIAIJ,0,0,0,0, 1448bb5a7306SBarry Smith MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ}; 144936ce4990SBarry Smith 14508a729477SBarry Smith 14515615d1e5SSatish Balay #undef __FUNC__ 14525615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIAIJ" 14531987afe7SBarry Smith /*@C 1454ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 14553a511b96SLois Curfman McInnes (the default parallel PETSc format). For good matrix assembly performance 14563a511b96SLois Curfman McInnes the user should preallocate the matrix storage by setting the parameters 14573a511b96SLois Curfman McInnes d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 14583a511b96SLois Curfman McInnes performance can be increased by more than a factor of 50. 14598a729477SBarry Smith 14608a729477SBarry Smith Input Parameters: 14611eb62cbbSBarry Smith . comm - MPI communicator 14627d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 146392e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 146492e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 146592e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 146692e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 146792e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 14687d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 146992e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1470ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1471ff756334SLois Curfman McInnes (same for all local rows) 14722bd5e0b2SLois Curfman McInnes . d_nzz - array containing the number of nonzeros in the various rows of the 147392e8d321SLois Curfman McInnes diagonal portion of the local submatrix (possibly different for each row) 14742bd5e0b2SLois Curfman McInnes or PETSC_NULL. You must leave room for the diagonal entry even if 14752bd5e0b2SLois Curfman McInnes it is zero. 14762bd5e0b2SLois Curfman McInnes . o_nz - number of nonzeros per row in the off-diagonal portion of local 1477ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 14782bd5e0b2SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various rows of the 14792bd5e0b2SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 14802bd5e0b2SLois Curfman McInnes each row) or PETSC_NULL. 14818a729477SBarry Smith 1482ff756334SLois Curfman McInnes Output Parameter: 148344cd7ae7SLois Curfman McInnes . A - the matrix 14848a729477SBarry Smith 1485ff756334SLois Curfman McInnes Notes: 1486ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1487ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 14880002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 14890002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1490ff756334SLois Curfman McInnes 1491ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1492ff756334SLois Curfman McInnes (possibly both). 1493ff756334SLois Curfman McInnes 14945511cfe3SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 14955511cfe3SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 14965511cfe3SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 14975511cfe3SLois Curfman McInnes 14985511cfe3SLois Curfman McInnes Options Database Keys: 14995511cfe3SLois Curfman McInnes $ -mat_aij_no_inode - Do not use inodes 15006e62573dSLois Curfman McInnes $ -mat_aij_inode_limit <limit> - Set inode limit. 15016e62573dSLois Curfman McInnes $ (max limit=5) 15026e62573dSLois Curfman McInnes $ -mat_aij_oneindex - Internally use indexing starting at 1 15036e62573dSLois Curfman McInnes $ rather than 0. Note: When calling MatSetValues(), 15046e62573dSLois Curfman McInnes $ the user still MUST index entries starting at 0! 15055511cfe3SLois Curfman McInnes 1506e0245417SLois Curfman McInnes Storage Information: 1507e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1508e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1509e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1510e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1511e0245417SLois Curfman McInnes 1512e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 15135ace5be8SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set 15145ace5be8SLois Curfman McInnes d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 15155ace5be8SLois Curfman McInnes memory allocation. Likewise, specify preallocated storage for the 15165ace5be8SLois Curfman McInnes off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1517ff756334SLois Curfman McInnes 15185511cfe3SLois Curfman McInnes Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 15195511cfe3SLois Curfman McInnes the figure below we depict these three local rows and all columns (0-11). 15202191d07cSBarry Smith 1521b810aeb4SBarry Smith $ 0 1 2 3 4 5 6 7 8 9 10 11 1522b810aeb4SBarry Smith $ ------------------- 15235511cfe3SLois Curfman McInnes $ row 3 | o o o d d d o o o o o o 15245511cfe3SLois Curfman McInnes $ row 4 | o o o d d d o o o o o o 15255511cfe3SLois Curfman McInnes $ row 5 | o o o d d d o o o o o o 15265511cfe3SLois Curfman McInnes $ ------------------- 1527b810aeb4SBarry Smith $ 1528b810aeb4SBarry Smith 15295511cfe3SLois Curfman McInnes Thus, any entries in the d locations are stored in the d (diagonal) 15305511cfe3SLois Curfman McInnes submatrix, and any entries in the o locations are stored in the 15315511cfe3SLois Curfman McInnes o (off-diagonal) submatrix. Note that the d and the o submatrices are 15325511cfe3SLois Curfman McInnes stored simply in the MATSEQAIJ format for compressed row storage. 15335511cfe3SLois Curfman McInnes 15345511cfe3SLois Curfman McInnes Now d_nz should indicate the number of nonzeros per row in the d matrix, 15355511cfe3SLois Curfman McInnes and o_nz should indicate the number of nonzeros per row in the o matrix. 15365511cfe3SLois Curfman McInnes In general, for PDE problems in which most nonzeros are near the diagonal, 15373d323bbdSBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 153892e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 15396da5968aSLois Curfman McInnes matrices. 15403a511b96SLois Curfman McInnes 1541dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1542ff756334SLois Curfman McInnes 1543fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 15448a729477SBarry Smith @*/ 15451eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 154644cd7ae7SLois Curfman McInnes int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 15478a729477SBarry Smith { 154844cd7ae7SLois Curfman McInnes Mat B; 154944cd7ae7SLois Curfman McInnes Mat_MPIAIJ *b; 155036ce4990SBarry Smith int ierr, i,sum[2],work[2],size; 1551416022c9SBarry Smith 155244cd7ae7SLois Curfman McInnes *A = 0; 155344cd7ae7SLois Curfman McInnes PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 155444cd7ae7SLois Curfman McInnes PLogObjectCreate(B); 155544cd7ae7SLois Curfman McInnes B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 155644cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_MPIAIJ)); 155744cd7ae7SLois Curfman McInnes PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 155836ce4990SBarry Smith MPI_Comm_size(comm,&size); 155936ce4990SBarry Smith if (size == 1) { 156036ce4990SBarry Smith B->ops.getrowij = MatGetRowIJ_MPIAIJ; 156136ce4990SBarry Smith B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 156236ce4990SBarry Smith B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 156336ce4990SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 156436ce4990SBarry Smith B->ops.lufactor = MatLUFactor_MPIAIJ; 156536ce4990SBarry Smith B->ops.solve = MatSolve_MPIAIJ; 156636ce4990SBarry Smith B->ops.solveadd = MatSolveAdd_MPIAIJ; 156736ce4990SBarry Smith B->ops.solvetrans = MatSolveTrans_MPIAIJ; 156836ce4990SBarry Smith B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 156936ce4990SBarry Smith B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 157036ce4990SBarry Smith } 157144cd7ae7SLois Curfman McInnes B->destroy = MatDestroy_MPIAIJ; 157244cd7ae7SLois Curfman McInnes B->view = MatView_MPIAIJ; 157344cd7ae7SLois Curfman McInnes B->factor = 0; 157444cd7ae7SLois Curfman McInnes B->assembled = PETSC_FALSE; 157590f02eecSBarry Smith B->mapping = 0; 1576d6dfbf8fSBarry Smith 157747794344SBarry Smith B->insertmode = NOT_SET_VALUES; 157844cd7ae7SLois Curfman McInnes MPI_Comm_rank(comm,&b->rank); 157944cd7ae7SLois Curfman McInnes MPI_Comm_size(comm,&b->size); 15801eb62cbbSBarry Smith 1581b4fd4287SBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1582e3372554SBarry Smith SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 15831987afe7SBarry Smith 1584dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 15851eb62cbbSBarry Smith work[0] = m; work[1] = n; 1586d65a2f8fSBarry Smith MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1587dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1588dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 15891eb62cbbSBarry Smith } 159044cd7ae7SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 159144cd7ae7SLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 159244cd7ae7SLois Curfman McInnes b->m = m; B->m = m; 159344cd7ae7SLois Curfman McInnes b->n = n; B->n = n; 159444cd7ae7SLois Curfman McInnes b->N = N; B->N = N; 159544cd7ae7SLois Curfman McInnes b->M = M; B->M = M; 15961eb62cbbSBarry Smith 15971eb62cbbSBarry Smith /* build local table of row and column ownerships */ 159844cd7ae7SLois Curfman McInnes b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 159944cd7ae7SLois Curfman McInnes PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1600603f58a4SSatish Balay b->cowners = b->rowners + b->size + 2; 160144cd7ae7SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 160244cd7ae7SLois Curfman McInnes b->rowners[0] = 0; 160344cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 160444cd7ae7SLois Curfman McInnes b->rowners[i] += b->rowners[i-1]; 16058a729477SBarry Smith } 160644cd7ae7SLois Curfman McInnes b->rstart = b->rowners[b->rank]; 160744cd7ae7SLois Curfman McInnes b->rend = b->rowners[b->rank+1]; 160844cd7ae7SLois Curfman McInnes MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 160944cd7ae7SLois Curfman McInnes b->cowners[0] = 0; 161044cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 161144cd7ae7SLois Curfman McInnes b->cowners[i] += b->cowners[i-1]; 16128a729477SBarry Smith } 161344cd7ae7SLois Curfman McInnes b->cstart = b->cowners[b->rank]; 161444cd7ae7SLois Curfman McInnes b->cend = b->cowners[b->rank+1]; 16158a729477SBarry Smith 16165ace5be8SLois Curfman McInnes if (d_nz == PETSC_DEFAULT) d_nz = 5; 161744cd7ae7SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 161844cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->A); 16197b8455f0SLois Curfman McInnes if (o_nz == PETSC_DEFAULT) o_nz = 0; 162044cd7ae7SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 162144cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->B); 16228a729477SBarry Smith 16231eb62cbbSBarry Smith /* build cache for off array entries formed */ 162444cd7ae7SLois Curfman McInnes ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 162590f02eecSBarry Smith b->donotstash = 0; 162644cd7ae7SLois Curfman McInnes b->colmap = 0; 162744cd7ae7SLois Curfman McInnes b->garray = 0; 162844cd7ae7SLois Curfman McInnes b->roworiented = 1; 16298a729477SBarry Smith 16301eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 163144cd7ae7SLois Curfman McInnes b->lvec = 0; 163244cd7ae7SLois Curfman McInnes b->Mvctx = 0; 16338a729477SBarry Smith 16347a0afa10SBarry Smith /* stuff for MatGetRow() */ 163544cd7ae7SLois Curfman McInnes b->rowindices = 0; 163644cd7ae7SLois Curfman McInnes b->rowvalues = 0; 163744cd7ae7SLois Curfman McInnes b->getrowactive = PETSC_FALSE; 16387a0afa10SBarry Smith 163944cd7ae7SLois Curfman McInnes *A = B; 1640d6dfbf8fSBarry Smith return 0; 1641d6dfbf8fSBarry Smith } 1642c74985f6SBarry Smith 16435615d1e5SSatish Balay #undef __FUNC__ 16445615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIAIJ" 16458f6be9afSLois Curfman McInnes int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1646d6dfbf8fSBarry Smith { 1647d6dfbf8fSBarry Smith Mat mat; 1648416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1649a1b97e82SLois Curfman McInnes int ierr, len=0, flg; 1650d6dfbf8fSBarry Smith 1651416022c9SBarry Smith *newmat = 0; 16520452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1653a5a9c739SBarry Smith PLogObjectCreate(mat); 16540452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1655416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 165644a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 165744a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1658d6dfbf8fSBarry Smith mat->factor = matin->factor; 1659c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1660d6dfbf8fSBarry Smith 166144cd7ae7SLois Curfman McInnes a->m = mat->m = oldmat->m; 166244cd7ae7SLois Curfman McInnes a->n = mat->n = oldmat->n; 166344cd7ae7SLois Curfman McInnes a->M = mat->M = oldmat->M; 166444cd7ae7SLois Curfman McInnes a->N = mat->N = oldmat->N; 1665d6dfbf8fSBarry Smith 1666416022c9SBarry Smith a->rstart = oldmat->rstart; 1667416022c9SBarry Smith a->rend = oldmat->rend; 1668416022c9SBarry Smith a->cstart = oldmat->cstart; 1669416022c9SBarry Smith a->cend = oldmat->cend; 167017699dbbSLois Curfman McInnes a->size = oldmat->size; 167117699dbbSLois Curfman McInnes a->rank = oldmat->rank; 167247794344SBarry Smith mat->insertmode = NOT_SET_VALUES; 1673bcd2baecSBarry Smith a->rowvalues = 0; 1674bcd2baecSBarry Smith a->getrowactive = PETSC_FALSE; 1675d6dfbf8fSBarry Smith 1676603f58a4SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1677603f58a4SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1678603f58a4SSatish Balay a->cowners = a->rowners + a->size + 2; 1679603f58a4SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1680416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 16812ee70a88SLois Curfman McInnes if (oldmat->colmap) { 16820452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1683416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1684416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1685416022c9SBarry Smith } else a->colmap = 0; 16863f41c07dSBarry Smith if (oldmat->garray) { 16873f41c07dSBarry Smith len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 16883f41c07dSBarry Smith a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1689464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 16903f41c07dSBarry Smith if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1691416022c9SBarry Smith } else a->garray = 0; 1692d6dfbf8fSBarry Smith 1693416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1694416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1695a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1696416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1697416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1698416022c9SBarry Smith PLogObjectParent(mat,a->A); 1699416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1700416022c9SBarry Smith PLogObjectParent(mat,a->B); 17015dd7a6c7SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 170225cdf11fSBarry Smith if (flg) { 1703682d7d0cSBarry Smith ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1704682d7d0cSBarry Smith } 17058a729477SBarry Smith *newmat = mat; 17068a729477SBarry Smith return 0; 17078a729477SBarry Smith } 1708416022c9SBarry Smith 170977c4ece6SBarry Smith #include "sys.h" 1710416022c9SBarry Smith 17115615d1e5SSatish Balay #undef __FUNC__ 17125615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIAIJ" 171319bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1714416022c9SBarry Smith { 1715d65a2f8fSBarry Smith Mat A; 1716416022c9SBarry Smith int i, nz, ierr, j,rstart, rend, fd; 1717d65a2f8fSBarry Smith Scalar *vals,*svals; 171819bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 1719416022c9SBarry Smith MPI_Status status; 172017699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1721d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 172219bcc07fSBarry Smith int tag = ((PetscObject)viewer)->tag; 1723416022c9SBarry Smith 172417699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 172517699dbbSLois Curfman McInnes if (!rank) { 172690ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 172777c4ece6SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1728e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1729416022c9SBarry Smith } 1730416022c9SBarry Smith 1731416022c9SBarry Smith MPI_Bcast(header+1,3,MPI_INT,0,comm); 1732416022c9SBarry Smith M = header[1]; N = header[2]; 1733416022c9SBarry Smith /* determine ownership of all rows */ 173417699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 17350452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1736416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1737416022c9SBarry Smith rowners[0] = 0; 173817699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1739416022c9SBarry Smith rowners[i] += rowners[i-1]; 1740416022c9SBarry Smith } 174117699dbbSLois Curfman McInnes rstart = rowners[rank]; 174217699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1743416022c9SBarry Smith 1744416022c9SBarry Smith /* distribute row lengths to all processors */ 17450452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1746416022c9SBarry Smith offlens = ourlens + (rend-rstart); 174717699dbbSLois Curfman McInnes if (!rank) { 17480452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 174977c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 17500452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 175117699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1752d65a2f8fSBarry Smith MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 17530452661fSBarry Smith PetscFree(sndcounts); 1754416022c9SBarry Smith } 1755416022c9SBarry Smith else { 1756416022c9SBarry Smith MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1757416022c9SBarry Smith } 1758416022c9SBarry Smith 175917699dbbSLois Curfman McInnes if (!rank) { 1760416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 17610452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1762cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 176317699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1764416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1765416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1766416022c9SBarry Smith } 1767416022c9SBarry Smith } 17680452661fSBarry Smith PetscFree(rowlengths); 1769416022c9SBarry Smith 1770416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1771416022c9SBarry Smith maxnz = 0; 177217699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 17730452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 1774416022c9SBarry Smith } 17750452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1776416022c9SBarry Smith 1777416022c9SBarry Smith /* read in my part of the matrix column indices */ 1778416022c9SBarry Smith nz = procsnz[0]; 17790452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 178077c4ece6SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1781d65a2f8fSBarry Smith 1782d65a2f8fSBarry Smith /* read in every one elses and ship off */ 178317699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1784d65a2f8fSBarry Smith nz = procsnz[i]; 178577c4ece6SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1786d65a2f8fSBarry Smith MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1787d65a2f8fSBarry Smith } 17880452661fSBarry Smith PetscFree(cols); 1789416022c9SBarry Smith } 1790416022c9SBarry Smith else { 1791416022c9SBarry Smith /* determine buffer space needed for message */ 1792416022c9SBarry Smith nz = 0; 1793416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1794416022c9SBarry Smith nz += ourlens[i]; 1795416022c9SBarry Smith } 17960452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1797416022c9SBarry Smith 1798416022c9SBarry Smith /* receive message of column indices*/ 1799d65a2f8fSBarry Smith MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1800416022c9SBarry Smith MPI_Get_count(&status,MPI_INT,&maxnz); 1801e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1802416022c9SBarry Smith } 1803416022c9SBarry Smith 1804416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1805cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 1806416022c9SBarry Smith jj = 0; 1807416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1808416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1809d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1810416022c9SBarry Smith jj++; 1811416022c9SBarry Smith } 1812416022c9SBarry Smith } 1813d65a2f8fSBarry Smith 1814d65a2f8fSBarry Smith /* create our matrix */ 1815416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1816416022c9SBarry Smith ourlens[i] -= offlens[i]; 1817416022c9SBarry Smith } 1818d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1819d65a2f8fSBarry Smith A = *newmat; 18206d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 1821d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1822d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1823d65a2f8fSBarry Smith } 1824416022c9SBarry Smith 182517699dbbSLois Curfman McInnes if (!rank) { 18260452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1827416022c9SBarry Smith 1828416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1829416022c9SBarry Smith nz = procsnz[0]; 183077c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1831d65a2f8fSBarry Smith 1832d65a2f8fSBarry Smith /* insert into matrix */ 1833d65a2f8fSBarry Smith jj = rstart; 1834d65a2f8fSBarry Smith smycols = mycols; 1835d65a2f8fSBarry Smith svals = vals; 1836d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1837d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1838d65a2f8fSBarry Smith smycols += ourlens[i]; 1839d65a2f8fSBarry Smith svals += ourlens[i]; 1840d65a2f8fSBarry Smith jj++; 1841416022c9SBarry Smith } 1842416022c9SBarry Smith 1843d65a2f8fSBarry Smith /* read in other processors and ship out */ 184417699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1845416022c9SBarry Smith nz = procsnz[i]; 184677c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1847d65a2f8fSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1848416022c9SBarry Smith } 18490452661fSBarry Smith PetscFree(procsnz); 1850416022c9SBarry Smith } 1851d65a2f8fSBarry Smith else { 1852d65a2f8fSBarry Smith /* receive numeric values */ 18530452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1854416022c9SBarry Smith 1855d65a2f8fSBarry Smith /* receive message of values*/ 1856d65a2f8fSBarry Smith MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1857d65a2f8fSBarry Smith MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1858e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1859d65a2f8fSBarry Smith 1860d65a2f8fSBarry Smith /* insert into matrix */ 1861d65a2f8fSBarry Smith jj = rstart; 1862d65a2f8fSBarry Smith smycols = mycols; 1863d65a2f8fSBarry Smith svals = vals; 1864d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1865d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1866d65a2f8fSBarry Smith smycols += ourlens[i]; 1867d65a2f8fSBarry Smith svals += ourlens[i]; 1868d65a2f8fSBarry Smith jj++; 1869d65a2f8fSBarry Smith } 1870d65a2f8fSBarry Smith } 18710452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1872d65a2f8fSBarry Smith 18736d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 18746d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1875416022c9SBarry Smith return 0; 1876416022c9SBarry Smith } 1877