1cb512458SBarry Smith #ifndef lint 2*96854ed6SLois Curfman McInnes static char vcid[] = "$Id: mpiaij.c,v 1.201 1997/04/10 00:02:50 bsmith Exp curfman $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 53369ce9aSBarry Smith #include "pinclude/pviewer.h" 670f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h" 7f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 8d9942c19SSatish Balay #include "src/inline/spops.h" 98a729477SBarry Smith 109e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 119e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 129e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 139e25ed09SBarry Smith length of colmap equals the global matrix length. 149e25ed09SBarry Smith */ 155615d1e5SSatish Balay #undef __FUNC__ 165eea60f9SBarry Smith #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */ 170a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat) 189e25ed09SBarry Smith { 1944a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 20ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 21905e6a2fSBarry Smith int n = B->n,i; 22dbb450caSBarry Smith 23758f045eSSatish Balay aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 24464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 25cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 26905e6a2fSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 279e25ed09SBarry Smith return 0; 289e25ed09SBarry Smith } 299e25ed09SBarry Smith 302493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 312493cbb0SBarry Smith 325615d1e5SSatish Balay #undef __FUNC__ 335615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIAIJ" 348f6be9afSLois Curfman McInnes int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 353b2fbd54SBarry Smith PetscTruth *done) 36299609e3SLois Curfman McInnes { 37299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 38299609e3SLois Curfman McInnes int ierr; 3917699dbbSLois Curfman McInnes if (aij->size == 1) { 403b2fbd54SBarry Smith ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 41e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 423b2fbd54SBarry Smith return 0; 433b2fbd54SBarry Smith } 443b2fbd54SBarry Smith 455615d1e5SSatish Balay #undef __FUNC__ 465eea60f9SBarry Smith #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */ 478f6be9afSLois Curfman McInnes int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 483b2fbd54SBarry Smith PetscTruth *done) 493b2fbd54SBarry Smith { 503b2fbd54SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 513b2fbd54SBarry Smith int ierr; 523b2fbd54SBarry Smith if (aij->size == 1) { 533b2fbd54SBarry Smith ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 54e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 55299609e3SLois Curfman McInnes return 0; 56299609e3SLois Curfman McInnes } 57299609e3SLois Curfman McInnes 580520107fSSatish Balay #define CHUNKSIZE 15 5930770e4dSSatish Balay #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 600520107fSSatish Balay { \ 610520107fSSatish Balay \ 620520107fSSatish Balay rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 6330770e4dSSatish Balay rmax = aimax[row]; nrow = ailen[row]; \ 64f5e9677aSSatish Balay col1 = col - shift; \ 65f5e9677aSSatish Balay \ 660520107fSSatish Balay for ( _i=0; _i<nrow; _i++ ) { \ 67f5e9677aSSatish Balay if (rp[_i] > col1) break; \ 68f5e9677aSSatish Balay if (rp[_i] == col1) { \ 690520107fSSatish Balay if (addv == ADD_VALUES) ap[_i] += value; \ 700520107fSSatish Balay else ap[_i] = value; \ 7130770e4dSSatish Balay goto a_noinsert; \ 720520107fSSatish Balay } \ 730520107fSSatish Balay } \ 7430770e4dSSatish Balay if (nonew) goto a_noinsert; \ 750520107fSSatish Balay if (nrow >= rmax) { \ 760520107fSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 770520107fSSatish Balay int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 780520107fSSatish Balay Scalar *new_a; \ 790520107fSSatish Balay \ 800520107fSSatish Balay /* malloc new storage space */ \ 810520107fSSatish Balay len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 820520107fSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 830520107fSSatish Balay new_j = (int *) (new_a + new_nz); \ 840520107fSSatish Balay new_i = new_j + new_nz; \ 850520107fSSatish Balay \ 860520107fSSatish Balay /* copy over old data into new slots */ \ 870520107fSSatish Balay for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 880520107fSSatish Balay for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 890520107fSSatish Balay PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 900520107fSSatish Balay len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 910520107fSSatish Balay PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 920520107fSSatish Balay len*sizeof(int)); \ 930520107fSSatish Balay PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 940520107fSSatish Balay PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 950520107fSSatish Balay len*sizeof(Scalar)); \ 960520107fSSatish Balay /* free up old matrix storage */ \ 97f5e9677aSSatish Balay \ 980520107fSSatish Balay PetscFree(a->a); \ 990520107fSSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 1000520107fSSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 1010520107fSSatish Balay a->singlemalloc = 1; \ 1020520107fSSatish Balay \ 1030520107fSSatish Balay rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 10430770e4dSSatish Balay rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 1050520107fSSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 1060520107fSSatish Balay a->maxnz += CHUNKSIZE; \ 1070520107fSSatish Balay a->reallocs++; \ 1080520107fSSatish Balay } \ 1090520107fSSatish Balay N = nrow++ - 1; a->nz++; \ 1100520107fSSatish Balay /* shift up all the later entries in this row */ \ 1110520107fSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 1120520107fSSatish Balay rp[ii+1] = rp[ii]; \ 1130520107fSSatish Balay ap[ii+1] = ap[ii]; \ 1140520107fSSatish Balay } \ 115f5e9677aSSatish Balay rp[_i] = col1; \ 1160520107fSSatish Balay ap[_i] = value; \ 11730770e4dSSatish Balay a_noinsert: ; \ 1180520107fSSatish Balay ailen[row] = nrow; \ 1190520107fSSatish Balay } 1200a198c4cSBarry Smith 12130770e4dSSatish Balay #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 12230770e4dSSatish Balay { \ 12330770e4dSSatish Balay \ 12430770e4dSSatish Balay rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 12530770e4dSSatish Balay rmax = bimax[row]; nrow = bilen[row]; \ 12630770e4dSSatish Balay col1 = col - shift; \ 12730770e4dSSatish Balay \ 12830770e4dSSatish Balay for ( _i=0; _i<nrow; _i++ ) { \ 12930770e4dSSatish Balay if (rp[_i] > col1) break; \ 13030770e4dSSatish Balay if (rp[_i] == col1) { \ 13130770e4dSSatish Balay if (addv == ADD_VALUES) ap[_i] += value; \ 13230770e4dSSatish Balay else ap[_i] = value; \ 13330770e4dSSatish Balay goto b_noinsert; \ 13430770e4dSSatish Balay } \ 13530770e4dSSatish Balay } \ 13630770e4dSSatish Balay if (nonew) goto b_noinsert; \ 13730770e4dSSatish Balay if (nrow >= rmax) { \ 13830770e4dSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 13974c639caSSatish Balay int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 14030770e4dSSatish Balay Scalar *new_a; \ 14130770e4dSSatish Balay \ 14230770e4dSSatish Balay /* malloc new storage space */ \ 14374c639caSSatish Balay len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 14430770e4dSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 14530770e4dSSatish Balay new_j = (int *) (new_a + new_nz); \ 14630770e4dSSatish Balay new_i = new_j + new_nz; \ 14730770e4dSSatish Balay \ 14830770e4dSSatish Balay /* copy over old data into new slots */ \ 14930770e4dSSatish Balay for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 15074c639caSSatish Balay for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 15130770e4dSSatish Balay PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 15230770e4dSSatish Balay len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 15330770e4dSSatish Balay PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 15430770e4dSSatish Balay len*sizeof(int)); \ 15530770e4dSSatish Balay PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 15630770e4dSSatish Balay PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 15730770e4dSSatish Balay len*sizeof(Scalar)); \ 15830770e4dSSatish Balay /* free up old matrix storage */ \ 15930770e4dSSatish Balay \ 16074c639caSSatish Balay PetscFree(b->a); \ 16174c639caSSatish Balay if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 16274c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 16374c639caSSatish Balay b->singlemalloc = 1; \ 16430770e4dSSatish Balay \ 16530770e4dSSatish Balay rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 16630770e4dSSatish Balay rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 16774c639caSSatish Balay PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 16874c639caSSatish Balay b->maxnz += CHUNKSIZE; \ 16974c639caSSatish Balay b->reallocs++; \ 17030770e4dSSatish Balay } \ 17174c639caSSatish Balay N = nrow++ - 1; b->nz++; \ 17230770e4dSSatish Balay /* shift up all the later entries in this row */ \ 17330770e4dSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 17430770e4dSSatish Balay rp[ii+1] = rp[ii]; \ 17530770e4dSSatish Balay ap[ii+1] = ap[ii]; \ 17630770e4dSSatish Balay } \ 17730770e4dSSatish Balay rp[_i] = col1; \ 17830770e4dSSatish Balay ap[_i] = value; \ 17930770e4dSSatish Balay b_noinsert: ; \ 18030770e4dSSatish Balay bilen[row] = nrow; \ 18130770e4dSSatish Balay } 18230770e4dSSatish Balay 1830520107fSSatish Balay extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 1845615d1e5SSatish Balay #undef __FUNC__ 1855615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIAIJ" 1868f6be9afSLois Curfman McInnes int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 1878a729477SBarry Smith { 18844a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1894b0e389bSBarry Smith Scalar value; 1901eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 1911eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 192905e6a2fSBarry Smith int roworiented = aij->roworiented; 1938a729477SBarry Smith 1940520107fSSatish Balay /* Some Variables required in the macro */ 1954ee7247eSSatish Balay Mat A = aij->A; 1964ee7247eSSatish Balay Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 19730770e4dSSatish Balay int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 19830770e4dSSatish Balay Scalar *aa = a->a; 19930770e4dSSatish Balay 20030770e4dSSatish Balay Mat B = aij->B; 20130770e4dSSatish Balay Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 20230770e4dSSatish Balay int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 20330770e4dSSatish Balay Scalar *ba = b->a; 20430770e4dSSatish Balay 2054ee7247eSSatish Balay int *rp,ii,nrow,_i,rmax, N, col1; 20630770e4dSSatish Balay int nonew = a->nonew,shift = a->indexshift; 20730770e4dSSatish Balay Scalar *ap; 2084ee7247eSSatish Balay 2098a729477SBarry Smith for ( i=0; i<m; i++ ) { 2100a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 211e3372554SBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 212e3372554SBarry Smith if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 2130a198c4cSBarry Smith #endif 2144b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 2154b0e389bSBarry Smith row = im[i] - rstart; 2161eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 2174b0e389bSBarry Smith if (in[j] >= cstart && in[j] < cend){ 2184b0e389bSBarry Smith col = in[j] - cstart; 2194b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 22030770e4dSSatish Balay MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 2210520107fSSatish Balay /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 2221eb62cbbSBarry Smith } 2230a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 224e3372554SBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 225e3372554SBarry Smith else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 2260a198c4cSBarry Smith #endif 2271eb62cbbSBarry Smith else { 228227d817aSBarry Smith if (mat->was_assembled) { 229905e6a2fSBarry Smith if (!aij->colmap) { 230905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 231905e6a2fSBarry Smith } 232905e6a2fSBarry Smith col = aij->colmap[in[j]] - 1; 233ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 2342493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2354b0e389bSBarry Smith col = in[j]; 236d6dfbf8fSBarry Smith } 2379e25ed09SBarry Smith } 2384b0e389bSBarry Smith else col = in[j]; 2394b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 24030770e4dSSatish Balay MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 24130770e4dSSatish Balay /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 2421eb62cbbSBarry Smith } 2431eb62cbbSBarry Smith } 2441eb62cbbSBarry Smith } 2451eb62cbbSBarry Smith else { 24690f02eecSBarry Smith if (roworiented && !aij->donotstash) { 2474b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 2484b0e389bSBarry Smith } 2494b0e389bSBarry Smith else { 25090f02eecSBarry Smith if (!aij->donotstash) { 2514b0e389bSBarry Smith row = im[i]; 2524b0e389bSBarry Smith for ( j=0; j<n; j++ ) { 2534b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 2544b0e389bSBarry Smith } 2554b0e389bSBarry Smith } 2561eb62cbbSBarry Smith } 2578a729477SBarry Smith } 25890f02eecSBarry Smith } 2598a729477SBarry Smith return 0; 2608a729477SBarry Smith } 2618a729477SBarry Smith 2625615d1e5SSatish Balay #undef __FUNC__ 2635615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIAIJ" 2648f6be9afSLois Curfman McInnes int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 265b49de8d1SLois Curfman McInnes { 266b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 267b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 268b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 269b49de8d1SLois Curfman McInnes 270b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 271e3372554SBarry Smith if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 272e3372554SBarry Smith if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 273b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 274b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 275b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 276e3372554SBarry Smith if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 277e3372554SBarry Smith if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 278b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 279b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 280b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 281b49de8d1SLois Curfman McInnes } 282b49de8d1SLois Curfman McInnes else { 283905e6a2fSBarry Smith if (!aij->colmap) { 284905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 285905e6a2fSBarry Smith } 286905e6a2fSBarry Smith col = aij->colmap[idxn[j]] - 1; 287e60e1c95SSatish Balay if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 288d9d09a02SSatish Balay else { 289b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 290b49de8d1SLois Curfman McInnes } 291b49de8d1SLois Curfman McInnes } 292b49de8d1SLois Curfman McInnes } 293d9d09a02SSatish Balay } 294b49de8d1SLois Curfman McInnes else { 295e3372554SBarry Smith SETERRQ(1,0,"Only local values currently supported"); 296b49de8d1SLois Curfman McInnes } 297b49de8d1SLois Curfman McInnes } 298b49de8d1SLois Curfman McInnes return 0; 299b49de8d1SLois Curfman McInnes } 300b49de8d1SLois Curfman McInnes 3015615d1e5SSatish Balay #undef __FUNC__ 3025615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 3038f6be9afSLois Curfman McInnes int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 3048a729477SBarry Smith { 30544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 306d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 30717699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 30817699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 3091eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 3106abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 3111eb62cbbSBarry Smith InsertMode addv; 3121eb62cbbSBarry Smith Scalar *rvalues,*svalues; 3131eb62cbbSBarry Smith 3141eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 31547794344SBarry Smith MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 316dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 317e3372554SBarry Smith SETERRQ(1,0,"Some processors inserted others added"); 3181eb62cbbSBarry Smith } 31947794344SBarry Smith mat->insertmode = addv; /* in case this processor had no cache */ 3201eb62cbbSBarry Smith 3211eb62cbbSBarry Smith /* first count number of contributors to each processor */ 3220452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 323cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 3240452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 3251eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 3261eb62cbbSBarry Smith idx = aij->stash.idx[i]; 32717699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 3281eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3291eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 3308a729477SBarry Smith } 3318a729477SBarry Smith } 3328a729477SBarry Smith } 33317699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 3341eb62cbbSBarry Smith 3351eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3360452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 33717699dbbSLois Curfman McInnes MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 33817699dbbSLois Curfman McInnes nreceives = work[rank]; 33917699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 34017699dbbSLois Curfman McInnes nmax = work[rank]; 3410452661fSBarry Smith PetscFree(work); 3421eb62cbbSBarry Smith 3431eb62cbbSBarry Smith /* post receives: 3441eb62cbbSBarry Smith 1) each message will consist of ordered pairs 3451eb62cbbSBarry Smith (global index,value) we store the global index as a double 346d6dfbf8fSBarry Smith to simplify the message passing. 3471eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 3481eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 3491eb62cbbSBarry Smith this is a lot of wasted space. 3501eb62cbbSBarry Smith 3511eb62cbbSBarry Smith 3521eb62cbbSBarry Smith This could be done better. 3531eb62cbbSBarry Smith */ 3540452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 35578b31e54SBarry Smith CHKPTRQ(rvalues); 3560452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 35778b31e54SBarry Smith CHKPTRQ(recv_waits); 3581eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 359416022c9SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 3601eb62cbbSBarry Smith comm,recv_waits+i); 3611eb62cbbSBarry Smith } 3621eb62cbbSBarry Smith 3631eb62cbbSBarry Smith /* do sends: 3641eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3651eb62cbbSBarry Smith the ith processor 3661eb62cbbSBarry Smith */ 3670452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 3680452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 36978b31e54SBarry Smith CHKPTRQ(send_waits); 3700452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 3711eb62cbbSBarry Smith starts[0] = 0; 37217699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3731eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 3741eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 3751eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 3761eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 3771eb62cbbSBarry Smith } 3780452661fSBarry Smith PetscFree(owner); 3791eb62cbbSBarry Smith starts[0] = 0; 38017699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3811eb62cbbSBarry Smith count = 0; 38217699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 3831eb62cbbSBarry Smith if (procs[i]) { 384416022c9SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 3851eb62cbbSBarry Smith comm,send_waits+count++); 3861eb62cbbSBarry Smith } 3871eb62cbbSBarry Smith } 3880452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 3891eb62cbbSBarry Smith 3901eb62cbbSBarry Smith /* Free cache space */ 39155908cccSLois Curfman McInnes PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 39278b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 3931eb62cbbSBarry Smith 3941eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 3951eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 3961eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 3971eb62cbbSBarry Smith aij->rmax = nmax; 3981eb62cbbSBarry Smith 3991eb62cbbSBarry Smith return 0; 4001eb62cbbSBarry Smith } 40144a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 4021eb62cbbSBarry Smith 4035615d1e5SSatish Balay #undef __FUNC__ 4045615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 4058f6be9afSLois Curfman McInnes int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 4061eb62cbbSBarry Smith { 40744a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 4081eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 409416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 410905e6a2fSBarry Smith int row,col,other_disassembled; 4111eb62cbbSBarry Smith Scalar *values,val; 41247794344SBarry Smith InsertMode addv = mat->insertmode; 4131eb62cbbSBarry Smith 4141eb62cbbSBarry Smith /* wait on receives */ 4151eb62cbbSBarry Smith while (count) { 416d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 4171eb62cbbSBarry Smith /* unpack receives into our local space */ 418d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 419c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 4201eb62cbbSBarry Smith n = n/3; 4211eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 422227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - aij->rstart; 423227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 4241eb62cbbSBarry Smith val = values[3*i+2]; 4251eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 4261eb62cbbSBarry Smith col -= aij->cstart; 4271eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 4281eb62cbbSBarry Smith } 4291eb62cbbSBarry Smith else { 43055a1b374SBarry Smith if (mat->was_assembled || mat->assembled) { 431905e6a2fSBarry Smith if (!aij->colmap) { 432905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 433905e6a2fSBarry Smith } 434905e6a2fSBarry Smith col = aij->colmap[col] - 1; 435ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4362493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 437227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 438d6dfbf8fSBarry Smith } 4399e25ed09SBarry Smith } 4401eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 4411eb62cbbSBarry Smith } 4421eb62cbbSBarry Smith } 4431eb62cbbSBarry Smith count--; 4441eb62cbbSBarry Smith } 4450452661fSBarry Smith PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 4461eb62cbbSBarry Smith 4471eb62cbbSBarry Smith /* wait on sends */ 4481eb62cbbSBarry Smith if (aij->nsends) { 4490a198c4cSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 4501eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 4510452661fSBarry Smith PetscFree(send_status); 4521eb62cbbSBarry Smith } 4530452661fSBarry Smith PetscFree(aij->send_waits); PetscFree(aij->svalues); 4541eb62cbbSBarry Smith 45578b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 45678b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 4571eb62cbbSBarry Smith 4582493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 4592493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 460227d817aSBarry Smith MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 461227d817aSBarry Smith if (mat->was_assembled && !other_disassembled) { 4622493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 4632493cbb0SBarry Smith } 4642493cbb0SBarry Smith 4656d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 46678b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 4675e42470aSBarry Smith } 46878b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 46978b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 4705e42470aSBarry Smith 4717a0afa10SBarry Smith if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 4728a729477SBarry Smith return 0; 4738a729477SBarry Smith } 4748a729477SBarry Smith 4755615d1e5SSatish Balay #undef __FUNC__ 4765615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIAIJ" 4778f6be9afSLois Curfman McInnes int MatZeroEntries_MPIAIJ(Mat A) 4781eb62cbbSBarry Smith { 47944a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 480dbd7a890SLois Curfman McInnes int ierr; 48178b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 48278b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 4831eb62cbbSBarry Smith return 0; 4841eb62cbbSBarry Smith } 4851eb62cbbSBarry Smith 4861eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 4871eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 4881eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 4891eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 4901eb62cbbSBarry Smith routine. 4911eb62cbbSBarry Smith */ 4925615d1e5SSatish Balay #undef __FUNC__ 4935615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIAIJ" 4948f6be9afSLois Curfman McInnes int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 4951eb62cbbSBarry Smith { 49644a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 49717699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 4986abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 49917699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 5005392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 501d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 502d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 5031eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 5041eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 5051eb62cbbSBarry Smith IS istmp; 5061eb62cbbSBarry Smith 50777c4ece6SBarry Smith ierr = ISGetSize(is,&N); CHKERRQ(ierr); 50878b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 5091eb62cbbSBarry Smith 5101eb62cbbSBarry Smith /* first count number of contributors to each processor */ 5110452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 512cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 5130452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 5141eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 5151eb62cbbSBarry Smith idx = rows[i]; 5161eb62cbbSBarry Smith found = 0; 51717699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 5181eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 5191eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 5201eb62cbbSBarry Smith } 5211eb62cbbSBarry Smith } 522e3372554SBarry Smith if (!found) SETERRQ(1,0,"Index out of range"); 5231eb62cbbSBarry Smith } 52417699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 5251eb62cbbSBarry Smith 5261eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 5270452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 52817699dbbSLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 52917699dbbSLois Curfman McInnes nrecvs = work[rank]; 53017699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 53117699dbbSLois Curfman McInnes nmax = work[rank]; 5320452661fSBarry Smith PetscFree(work); 5331eb62cbbSBarry Smith 5341eb62cbbSBarry Smith /* post receives: */ 5350452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 53678b31e54SBarry Smith CHKPTRQ(rvalues); 5370452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 53878b31e54SBarry Smith CHKPTRQ(recv_waits); 5391eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 540416022c9SBarry Smith MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 5411eb62cbbSBarry Smith } 5421eb62cbbSBarry Smith 5431eb62cbbSBarry Smith /* do sends: 5441eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 5451eb62cbbSBarry Smith the ith processor 5461eb62cbbSBarry Smith */ 5470452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 5480452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 54978b31e54SBarry Smith CHKPTRQ(send_waits); 5500452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 5511eb62cbbSBarry Smith starts[0] = 0; 55217699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 5531eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 5541eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 5551eb62cbbSBarry Smith } 5561eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 5571eb62cbbSBarry Smith 5581eb62cbbSBarry Smith starts[0] = 0; 55917699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 5601eb62cbbSBarry Smith count = 0; 56117699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 5621eb62cbbSBarry Smith if (procs[i]) { 563416022c9SBarry Smith MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 5641eb62cbbSBarry Smith } 5651eb62cbbSBarry Smith } 5660452661fSBarry Smith PetscFree(starts); 5671eb62cbbSBarry Smith 56817699dbbSLois Curfman McInnes base = owners[rank]; 5691eb62cbbSBarry Smith 5701eb62cbbSBarry Smith /* wait on receives */ 5710452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 5721eb62cbbSBarry Smith source = lens + nrecvs; 5731eb62cbbSBarry Smith count = nrecvs; slen = 0; 5741eb62cbbSBarry Smith while (count) { 575d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 5761eb62cbbSBarry Smith /* unpack receives into our local space */ 5771eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 578d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 579d6dfbf8fSBarry Smith lens[imdex] = n; 5801eb62cbbSBarry Smith slen += n; 5811eb62cbbSBarry Smith count--; 5821eb62cbbSBarry Smith } 5830452661fSBarry Smith PetscFree(recv_waits); 5841eb62cbbSBarry Smith 5851eb62cbbSBarry Smith /* move the data into the send scatter */ 5860452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 5871eb62cbbSBarry Smith count = 0; 5881eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 5891eb62cbbSBarry Smith values = rvalues + i*nmax; 5901eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 5911eb62cbbSBarry Smith lrows[count++] = values[j] - base; 5921eb62cbbSBarry Smith } 5931eb62cbbSBarry Smith } 5940452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 5950452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 5961eb62cbbSBarry Smith 5971eb62cbbSBarry Smith /* actually zap the local rows */ 598029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 599464493b3SBarry Smith PLogObjectParent(A,istmp); 6000452661fSBarry Smith PetscFree(lrows); 60178b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 60278b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 60378b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 6041eb62cbbSBarry Smith 6051eb62cbbSBarry Smith /* wait on sends */ 6061eb62cbbSBarry Smith if (nsends) { 6070452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 60878b31e54SBarry Smith CHKPTRQ(send_status); 6091eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 6100452661fSBarry Smith PetscFree(send_status); 6111eb62cbbSBarry Smith } 6120452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 6131eb62cbbSBarry Smith 6141eb62cbbSBarry Smith return 0; 6151eb62cbbSBarry Smith } 6161eb62cbbSBarry Smith 6175615d1e5SSatish Balay #undef __FUNC__ 6185615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIAIJ" 6198f6be9afSLois Curfman McInnes int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 6201eb62cbbSBarry Smith { 621416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 622fbd6ef76SBarry Smith int ierr,nt; 623416022c9SBarry Smith 624a2ce50c7SBarry Smith ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 625fbd6ef76SBarry Smith if (nt != a->n) { 626f40265daSLois Curfman McInnes SETERRQ(1,0,"Incompatible partition of A and xx"); 627fbd6ef76SBarry Smith } 62843a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 62938597bd4SSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 63043a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 63138597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 6321eb62cbbSBarry Smith return 0; 6331eb62cbbSBarry Smith } 6341eb62cbbSBarry Smith 6355615d1e5SSatish Balay #undef __FUNC__ 6365615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIAIJ" 6378f6be9afSLois Curfman McInnes int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 638da3a660dSBarry Smith { 639416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 640da3a660dSBarry Smith int ierr; 64143a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 64238597bd4SSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 64343a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 64438597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 645da3a660dSBarry Smith return 0; 646da3a660dSBarry Smith } 647da3a660dSBarry Smith 6485615d1e5SSatish Balay #undef __FUNC__ 6495615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIAIJ" 6508f6be9afSLois Curfman McInnes int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 651da3a660dSBarry Smith { 652416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 653da3a660dSBarry Smith int ierr; 654da3a660dSBarry Smith 655da3a660dSBarry Smith /* do nondiagonal part */ 65638597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 657da3a660dSBarry Smith /* send it on its way */ 658537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 659da3a660dSBarry Smith /* do local part */ 66038597bd4SSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 661da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 662da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 663da3a660dSBarry Smith /* but is not perhaps always true. */ 664537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 665da3a660dSBarry Smith return 0; 666da3a660dSBarry Smith } 667da3a660dSBarry Smith 6685615d1e5SSatish Balay #undef __FUNC__ 6695615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIAIJ" 6708f6be9afSLois Curfman McInnes int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 671da3a660dSBarry Smith { 672416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 673da3a660dSBarry Smith int ierr; 674da3a660dSBarry Smith 675da3a660dSBarry Smith /* do nondiagonal part */ 67638597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 677da3a660dSBarry Smith /* send it on its way */ 678537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 679da3a660dSBarry Smith /* do local part */ 68038597bd4SSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 681da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 682da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 683da3a660dSBarry Smith /* but is not perhaps always true. */ 6840a198c4cSBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 685da3a660dSBarry Smith return 0; 686da3a660dSBarry Smith } 687da3a660dSBarry Smith 6881eb62cbbSBarry Smith /* 6891eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 6901eb62cbbSBarry Smith diagonal block 6911eb62cbbSBarry Smith */ 6925615d1e5SSatish Balay #undef __FUNC__ 6935615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIAIJ" 6948f6be9afSLois Curfman McInnes int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 6951eb62cbbSBarry Smith { 696416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 69744cd7ae7SLois Curfman McInnes if (a->M != a->N) 698e3372554SBarry Smith SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 6995baf8537SBarry Smith if (a->rstart != a->cstart || a->rend != a->cend) { 700e3372554SBarry Smith SETERRQ(1,0,"row partition must equal col partition"); } 701416022c9SBarry Smith return MatGetDiagonal(a->A,v); 7021eb62cbbSBarry Smith } 7031eb62cbbSBarry Smith 7045615d1e5SSatish Balay #undef __FUNC__ 7055615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIAIJ" 7068f6be9afSLois Curfman McInnes int MatScale_MPIAIJ(Scalar *aa,Mat A) 707052efed2SBarry Smith { 708052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 709052efed2SBarry Smith int ierr; 710052efed2SBarry Smith ierr = MatScale(aa,a->A); CHKERRQ(ierr); 711052efed2SBarry Smith ierr = MatScale(aa,a->B); CHKERRQ(ierr); 712052efed2SBarry Smith return 0; 713052efed2SBarry Smith } 714052efed2SBarry Smith 7155615d1e5SSatish Balay #undef __FUNC__ 7165eea60f9SBarry Smith #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */ 7178f6be9afSLois Curfman McInnes int MatDestroy_MPIAIJ(PetscObject obj) 7181eb62cbbSBarry Smith { 7191eb62cbbSBarry Smith Mat mat = (Mat) obj; 72044a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 7211eb62cbbSBarry Smith int ierr; 722a5a9c739SBarry Smith #if defined(PETSC_LOG) 7236e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 724a5a9c739SBarry Smith #endif 7250452661fSBarry Smith PetscFree(aij->rowners); 72678b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 72778b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 7280452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 7290452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 7301eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 731a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 7327a0afa10SBarry Smith if (aij->rowvalues) PetscFree(aij->rowvalues); 7330452661fSBarry Smith PetscFree(aij); 73490f02eecSBarry Smith if (mat->mapping) { 73590f02eecSBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 73690f02eecSBarry Smith } 737a5a9c739SBarry Smith PLogObjectDestroy(mat); 7380452661fSBarry Smith PetscHeaderDestroy(mat); 7391eb62cbbSBarry Smith return 0; 7401eb62cbbSBarry Smith } 741ee50ffe9SBarry Smith 7425615d1e5SSatish Balay #undef __FUNC__ 7435eea60f9SBarry Smith #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */ 7448f6be9afSLois Curfman McInnes extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 7451eb62cbbSBarry Smith { 746416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 747416022c9SBarry Smith int ierr; 748416022c9SBarry Smith 74917699dbbSLois Curfman McInnes if (aij->size == 1) { 750416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 751416022c9SBarry Smith } 752e3372554SBarry Smith else SETERRQ(1,0,"Only uniprocessor output supported"); 753416022c9SBarry Smith return 0; 754416022c9SBarry Smith } 755416022c9SBarry Smith 7565615d1e5SSatish Balay #undef __FUNC__ 7575eea60f9SBarry Smith #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */ 7588f6be9afSLois Curfman McInnes extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 759416022c9SBarry Smith { 76044a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 761dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 762a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 763d636dbe3SBarry Smith FILE *fd; 76419bcc07fSBarry Smith ViewerType vtype; 765416022c9SBarry Smith 76619bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 76719bcc07fSBarry Smith if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 76890ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 7690a198c4cSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 7704e220ebcSLois Curfman McInnes MatInfo info; 7714e220ebcSLois Curfman McInnes int flg; 772a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 77390ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 7744e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 77595e01e2fSLois Curfman McInnes ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 77677c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 77795e01e2fSLois Curfman McInnes if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 7784e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 77995e01e2fSLois Curfman McInnes else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 7804e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 7814e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 7824e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 7834e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 7844e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 785a56f8943SBarry Smith fflush(fd); 78677c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 787a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 788416022c9SBarry Smith return 0; 789416022c9SBarry Smith } 7900a198c4cSBarry Smith else if (format == VIEWER_FORMAT_ASCII_INFO) { 79108480c60SBarry Smith return 0; 79208480c60SBarry Smith } 793416022c9SBarry Smith } 794416022c9SBarry Smith 79519bcc07fSBarry Smith if (vtype == DRAW_VIEWER) { 79619bcc07fSBarry Smith Draw draw; 79719bcc07fSBarry Smith PetscTruth isnull; 79819bcc07fSBarry Smith ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 79919bcc07fSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 80019bcc07fSBarry Smith } 80119bcc07fSBarry Smith 80219bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER) { 80390ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 80477c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 805d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 80617699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 8071eb62cbbSBarry Smith aij->cend); 80878b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 80978b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 810d13ab20cSBarry Smith fflush(fd); 81177c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 812d13ab20cSBarry Smith } 813416022c9SBarry Smith else { 814a56f8943SBarry Smith int size = aij->size; 815a56f8943SBarry Smith rank = aij->rank; 81617699dbbSLois Curfman McInnes if (size == 1) { 81778b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 81895373324SBarry Smith } 81995373324SBarry Smith else { 82095373324SBarry Smith /* assemble the entire matrix onto first processor. */ 82195373324SBarry Smith Mat A; 822ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 8232eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 82495373324SBarry Smith Scalar *a; 8252ee70a88SLois Curfman McInnes 82617699dbbSLois Curfman McInnes if (!rank) { 827b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 828c451ab25SLois Curfman McInnes CHKERRQ(ierr); 82995373324SBarry Smith } 83095373324SBarry Smith else { 831b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 832c451ab25SLois Curfman McInnes CHKERRQ(ierr); 83395373324SBarry Smith } 834464493b3SBarry Smith PLogObjectParent(mat,A); 835416022c9SBarry Smith 83695373324SBarry Smith /* copy over the A part */ 837ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 8382ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 83995373324SBarry Smith row = aij->rstart; 840dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 84195373324SBarry Smith for ( i=0; i<m; i++ ) { 842416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 84395373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 84495373324SBarry Smith } 8452ee70a88SLois Curfman McInnes aj = Aloc->j; 846dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 84795373324SBarry Smith 84895373324SBarry Smith /* copy over the B part */ 849ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 8502ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 85195373324SBarry Smith row = aij->rstart; 8520452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 853dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 85495373324SBarry Smith for ( i=0; i<m; i++ ) { 855416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 85695373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 85795373324SBarry Smith } 8580452661fSBarry Smith PetscFree(ct); 8596d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 8606d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 86117699dbbSLois Curfman McInnes if (!rank) { 86278b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 86395373324SBarry Smith } 86478b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 86595373324SBarry Smith } 86695373324SBarry Smith } 8671eb62cbbSBarry Smith return 0; 8681eb62cbbSBarry Smith } 8691eb62cbbSBarry Smith 8705615d1e5SSatish Balay #undef __FUNC__ 8715eea60f9SBarry Smith #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */ 8728f6be9afSLois Curfman McInnes int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 873416022c9SBarry Smith { 874416022c9SBarry Smith Mat mat = (Mat) obj; 875416022c9SBarry Smith int ierr; 87619bcc07fSBarry Smith ViewerType vtype; 877416022c9SBarry Smith 87819bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 87919bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 88019bcc07fSBarry Smith vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 881d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 882416022c9SBarry Smith } 88319bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 884416022c9SBarry Smith return MatView_MPIAIJ_Binary(mat,viewer); 885416022c9SBarry Smith } 886416022c9SBarry Smith return 0; 887416022c9SBarry Smith } 888416022c9SBarry Smith 8891eb62cbbSBarry Smith /* 8901eb62cbbSBarry Smith This has to provide several versions. 8911eb62cbbSBarry Smith 8921eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 8931eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 894d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 8951eb62cbbSBarry Smith */ 8965615d1e5SSatish Balay #undef __FUNC__ 8975615d1e5SSatish Balay #define __FUNC__ "MatRelax_MPIAIJ" 8988f6be9afSLois Curfman McInnes int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 899dbb450caSBarry Smith double fshift,int its,Vec xx) 9008a729477SBarry Smith { 90144a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 902d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 903ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 904c16cb8f2SBarry Smith Scalar *b,*x,*xs,*ls,d,*v,sum; 9056abc6512SBarry Smith int ierr,*idx, *diag; 906416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 9078a729477SBarry Smith 908d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 909dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 910dbb450caSBarry Smith ls = ls + shift; 911ec8511deSBarry Smith if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 912d6dfbf8fSBarry Smith diag = A->diag; 913c16cb8f2SBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 914da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 91538597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 916da3a660dSBarry Smith } 91743a90d84SBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 91878b31e54SBarry Smith CHKERRQ(ierr); 91943a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 92078b31e54SBarry Smith CHKERRQ(ierr); 921d6dfbf8fSBarry Smith while (its--) { 922d6dfbf8fSBarry Smith /* go down through the rows */ 923d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 924d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 925dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 926dbb450caSBarry Smith v = A->a + A->i[i] + shift; 927d6dfbf8fSBarry Smith sum = b[i]; 928d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 929dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 930d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 931dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 932dbb450caSBarry Smith v = B->a + B->i[i] + shift; 933d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 93455a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 935d6dfbf8fSBarry Smith } 936d6dfbf8fSBarry Smith /* come up through the rows */ 937d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 938d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 939dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 940dbb450caSBarry Smith v = A->a + A->i[i] + shift; 941d6dfbf8fSBarry Smith sum = b[i]; 942d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 943dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 944d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 945dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 946dbb450caSBarry Smith v = B->a + B->i[i] + shift; 947d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 94855a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 949d6dfbf8fSBarry Smith } 950d6dfbf8fSBarry Smith } 951d6dfbf8fSBarry Smith } 952d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 953da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 95438597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 955da3a660dSBarry Smith } 95643a90d84SBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 95778b31e54SBarry Smith CHKERRQ(ierr); 95843a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 95978b31e54SBarry Smith CHKERRQ(ierr); 960d6dfbf8fSBarry Smith while (its--) { 961d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 962d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 963dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 964dbb450caSBarry Smith v = A->a + A->i[i] + shift; 965d6dfbf8fSBarry Smith sum = b[i]; 966d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 967dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 968d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 969dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 970dbb450caSBarry Smith v = B->a + B->i[i] + shift; 971d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 97255a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 973d6dfbf8fSBarry Smith } 974d6dfbf8fSBarry Smith } 975d6dfbf8fSBarry Smith } 976d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 977da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 97838597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 979da3a660dSBarry Smith } 98043a90d84SBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 98178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 98243a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 98378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 984d6dfbf8fSBarry Smith while (its--) { 985d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 986d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 987dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 988dbb450caSBarry Smith v = A->a + A->i[i] + shift; 989d6dfbf8fSBarry Smith sum = b[i]; 990d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 991dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 992d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 993dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 994dbb450caSBarry Smith v = B->a + B->i[i] + shift; 995d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 99655a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 997d6dfbf8fSBarry Smith } 998d6dfbf8fSBarry Smith } 999d6dfbf8fSBarry Smith } 1000c16cb8f2SBarry Smith else { 1001e3372554SBarry Smith SETERRQ(1,0,"Option not supported"); 1002c16cb8f2SBarry Smith } 10038a729477SBarry Smith return 0; 10048a729477SBarry Smith } 1005a66be287SLois Curfman McInnes 10065615d1e5SSatish Balay #undef __FUNC__ 10075eea60f9SBarry Smith #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */ 10088f6be9afSLois Curfman McInnes int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1009a66be287SLois Curfman McInnes { 1010a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1011a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 10124e220ebcSLois Curfman McInnes int ierr; 10134e220ebcSLois Curfman McInnes double isend[5], irecv[5]; 1014a66be287SLois Curfman McInnes 10154e220ebcSLois Curfman McInnes info->rows_global = (double)mat->M; 10164e220ebcSLois Curfman McInnes info->columns_global = (double)mat->N; 10174e220ebcSLois Curfman McInnes info->rows_local = (double)mat->m; 10184e220ebcSLois Curfman McInnes info->columns_local = (double)mat->N; 10194e220ebcSLois Curfman McInnes info->block_size = 1.0; 10204e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 10214e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 10224e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 10234e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 10244e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 10254e220ebcSLois Curfman McInnes isend[3] += info->memory; isend[4] += info->mallocs; 1026a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 10274e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 10284e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 10294e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 10304e220ebcSLois Curfman McInnes info->memory = isend[3]; 10314e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 1032a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 10334e220ebcSLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 10344e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 10354e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 10364e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 10374e220ebcSLois Curfman McInnes info->memory = irecv[3]; 10384e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1039a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 10404e220ebcSLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 10414e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 10424e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 10434e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 10444e220ebcSLois Curfman McInnes info->memory = irecv[3]; 10454e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1046a66be287SLois Curfman McInnes } 10474e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 10484e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 10494e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 10504e220ebcSLois Curfman McInnes 1051a66be287SLois Curfman McInnes return 0; 1052a66be287SLois Curfman McInnes } 1053a66be287SLois Curfman McInnes 1054299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1055299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1056299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1057299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1058299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1059299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1060299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1061299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1062299609e3SLois Curfman McInnes 10635615d1e5SSatish Balay #undef __FUNC__ 10645eea60f9SBarry Smith #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */ 10658f6be9afSLois Curfman McInnes int MatSetOption_MPIAIJ(Mat A,MatOption op) 1066c74985f6SBarry Smith { 1067c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1068c74985f6SBarry Smith 10696d4a8577SBarry Smith if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 10706d4a8577SBarry Smith op == MAT_YES_NEW_NONZERO_LOCATIONS || 10716da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1072c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 1073*96854ed6SLois Curfman McInnes op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1074c2653b3dSLois Curfman McInnes op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1075b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 1076b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 1077b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1078aeafbbfcSLois Curfman McInnes a->roworiented = 1; 1079c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 1080c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 1081b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 10826da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 10836d4a8577SBarry Smith op == MAT_SYMMETRIC || 10846d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 10856d4a8577SBarry Smith op == MAT_YES_NEW_DIAGONALS) 108694a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 10876d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) { 10884b0e389bSBarry Smith a->roworiented = 0; 10894b0e389bSBarry Smith MatSetOption(a->A,op); 10904b0e389bSBarry Smith MatSetOption(a->B,op); 10912b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 109290f02eecSBarry Smith a->donotstash = 1; 109390f02eecSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) 1094e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1095c0bbcb79SLois Curfman McInnes else 1096e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1097c74985f6SBarry Smith return 0; 1098c74985f6SBarry Smith } 1099c74985f6SBarry Smith 11005615d1e5SSatish Balay #undef __FUNC__ 11015eea60f9SBarry Smith #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */ 11028f6be9afSLois Curfman McInnes int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1103c74985f6SBarry Smith { 110444a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1105c74985f6SBarry Smith *m = mat->M; *n = mat->N; 1106c74985f6SBarry Smith return 0; 1107c74985f6SBarry Smith } 1108c74985f6SBarry Smith 11095615d1e5SSatish Balay #undef __FUNC__ 11105eea60f9SBarry Smith #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */ 11118f6be9afSLois Curfman McInnes int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1112c74985f6SBarry Smith { 111344a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1114b7c46309SBarry Smith *m = mat->m; *n = mat->N; 1115c74985f6SBarry Smith return 0; 1116c74985f6SBarry Smith } 1117c74985f6SBarry Smith 11185615d1e5SSatish Balay #undef __FUNC__ 11195eea60f9SBarry Smith #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */ 11208f6be9afSLois Curfman McInnes int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1121c74985f6SBarry Smith { 112244a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1123c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 1124c74985f6SBarry Smith return 0; 1125c74985f6SBarry Smith } 1126c74985f6SBarry Smith 11276d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11286d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11296d84be18SBarry Smith 11305615d1e5SSatish Balay #undef __FUNC__ 11315615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIAIJ" 11326d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 113339e00950SLois Curfman McInnes { 1134154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 113570f0671dSBarry Smith Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1136154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1137154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 113870f0671dSBarry Smith int *cmap, *idx_p; 113939e00950SLois Curfman McInnes 1140e3372554SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 11417a0afa10SBarry Smith mat->getrowactive = PETSC_TRUE; 11427a0afa10SBarry Smith 114370f0671dSBarry Smith if (!mat->rowvalues && (idx || v)) { 11447a0afa10SBarry Smith /* 11457a0afa10SBarry Smith allocate enough space to hold information from the longest row. 11467a0afa10SBarry Smith */ 11477a0afa10SBarry Smith Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1148c16cb8f2SBarry Smith int max = 1,m = mat->m,tmp; 1149c16cb8f2SBarry Smith for ( i=0; i<m; i++ ) { 11507a0afa10SBarry Smith tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 11517a0afa10SBarry Smith if (max < tmp) { max = tmp; } 11527a0afa10SBarry Smith } 11537a0afa10SBarry Smith mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 11547a0afa10SBarry Smith CHKPTRQ(mat->rowvalues); 11557a0afa10SBarry Smith mat->rowindices = (int *) (mat->rowvalues + max); 11567a0afa10SBarry Smith } 11577a0afa10SBarry Smith 1158e3372554SBarry Smith if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1159abc0e9e4SLois Curfman McInnes lrow = row - rstart; 116039e00950SLois Curfman McInnes 1161154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1162154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1163154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 116438597bd4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 116538597bd4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1166154123eaSLois Curfman McInnes nztot = nzA + nzB; 1167154123eaSLois Curfman McInnes 116870f0671dSBarry Smith cmap = mat->garray; 1169154123eaSLois Curfman McInnes if (v || idx) { 1170154123eaSLois Curfman McInnes if (nztot) { 1171154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 117270f0671dSBarry Smith int imark = -1; 1173154123eaSLois Curfman McInnes if (v) { 117470f0671dSBarry Smith *v = v_p = mat->rowvalues; 117539e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 117670f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1177154123eaSLois Curfman McInnes else break; 1178154123eaSLois Curfman McInnes } 1179154123eaSLois Curfman McInnes imark = i; 118070f0671dSBarry Smith for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 118170f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1182154123eaSLois Curfman McInnes } 1183154123eaSLois Curfman McInnes if (idx) { 118470f0671dSBarry Smith *idx = idx_p = mat->rowindices; 118570f0671dSBarry Smith if (imark > -1) { 118670f0671dSBarry Smith for ( i=0; i<imark; i++ ) { 118770f0671dSBarry Smith idx_p[i] = cmap[cworkB[i]]; 118870f0671dSBarry Smith } 118970f0671dSBarry Smith } else { 1190154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 119170f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1192154123eaSLois Curfman McInnes else break; 1193154123eaSLois Curfman McInnes } 1194154123eaSLois Curfman McInnes imark = i; 119570f0671dSBarry Smith } 119670f0671dSBarry Smith for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 119770f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 119839e00950SLois Curfman McInnes } 119939e00950SLois Curfman McInnes } 12001ca473b0SSatish Balay else { 12011ca473b0SSatish Balay if (idx) *idx = 0; 12021ca473b0SSatish Balay if (v) *v = 0; 12031ca473b0SSatish Balay } 1204154123eaSLois Curfman McInnes } 120539e00950SLois Curfman McInnes *nz = nztot; 120638597bd4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 120738597bd4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 120839e00950SLois Curfman McInnes return 0; 120939e00950SLois Curfman McInnes } 121039e00950SLois Curfman McInnes 12115615d1e5SSatish Balay #undef __FUNC__ 12125eea60f9SBarry Smith #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */ 12136d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 121439e00950SLois Curfman McInnes { 12157a0afa10SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 12167a0afa10SBarry Smith if (aij->getrowactive == PETSC_FALSE) { 1217e3372554SBarry Smith SETERRQ(1,0,"MatGetRow not called"); 12187a0afa10SBarry Smith } 12197a0afa10SBarry Smith aij->getrowactive = PETSC_FALSE; 122039e00950SLois Curfman McInnes return 0; 122139e00950SLois Curfman McInnes } 122239e00950SLois Curfman McInnes 12235615d1e5SSatish Balay #undef __FUNC__ 12245615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIAIJ" 12258f6be9afSLois Curfman McInnes int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1226855ac2c5SLois Curfman McInnes { 1227855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1228ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1229416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1230416022c9SBarry Smith double sum = 0.0; 123104ca555eSLois Curfman McInnes Scalar *v; 123204ca555eSLois Curfman McInnes 123317699dbbSLois Curfman McInnes if (aij->size == 1) { 123414183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 123537fa93a5SLois Curfman McInnes } else { 123604ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 123704ca555eSLois Curfman McInnes v = amat->a; 123804ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 123904ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 124004ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 124104ca555eSLois Curfman McInnes #else 124204ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 124304ca555eSLois Curfman McInnes #endif 124404ca555eSLois Curfman McInnes } 124504ca555eSLois Curfman McInnes v = bmat->a; 124604ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 124704ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 124804ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 124904ca555eSLois Curfman McInnes #else 125004ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 125104ca555eSLois Curfman McInnes #endif 125204ca555eSLois Curfman McInnes } 12536d84be18SBarry Smith MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 125404ca555eSLois Curfman McInnes *norm = sqrt(*norm); 125504ca555eSLois Curfman McInnes } 125604ca555eSLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 125704ca555eSLois Curfman McInnes double *tmp, *tmp2; 125804ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 1259758f045eSSatish Balay tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1260758f045eSSatish Balay tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1261cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 126204ca555eSLois Curfman McInnes *norm = 0.0; 126304ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 126404ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1265579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 126604ca555eSLois Curfman McInnes } 126704ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 126804ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1269579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 127004ca555eSLois Curfman McInnes } 12716d84be18SBarry Smith MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 127204ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 127304ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 127404ca555eSLois Curfman McInnes } 12750452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 127604ca555eSLois Curfman McInnes } 127704ca555eSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 1278515d9167SLois Curfman McInnes double ntemp = 0.0; 127904ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1280dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 128104ca555eSLois Curfman McInnes sum = 0.0; 128204ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1283cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 128404ca555eSLois Curfman McInnes } 1285dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 128604ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1287cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 128804ca555eSLois Curfman McInnes } 1289515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 129004ca555eSLois Curfman McInnes } 12916d84be18SBarry Smith MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 129204ca555eSLois Curfman McInnes } 129304ca555eSLois Curfman McInnes else { 1294e3372554SBarry Smith SETERRQ(1,0,"No support for two norm"); 129504ca555eSLois Curfman McInnes } 129637fa93a5SLois Curfman McInnes } 1297855ac2c5SLois Curfman McInnes return 0; 1298855ac2c5SLois Curfman McInnes } 1299855ac2c5SLois Curfman McInnes 13005615d1e5SSatish Balay #undef __FUNC__ 13015615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIAIJ" 13028f6be9afSLois Curfman McInnes int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1303b7c46309SBarry Smith { 1304b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1305dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1306416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1307416022c9SBarry Smith Mat B; 1308b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1309b7c46309SBarry Smith Scalar *array; 1310b7c46309SBarry Smith 13113638b69dSLois Curfman McInnes if (matout == PETSC_NULL && M != N) 1312e3372554SBarry Smith SETERRQ(1,0,"Square matrix only for in-place"); 1313b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1314b4fd4287SBarry Smith PETSC_NULL,&B); CHKERRQ(ierr); 1315b7c46309SBarry Smith 1316b7c46309SBarry Smith /* copy over the A part */ 1317ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1318b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1319b7c46309SBarry Smith row = a->rstart; 1320dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1321b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1322416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1323b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1324b7c46309SBarry Smith } 1325b7c46309SBarry Smith aj = Aloc->j; 13264af08d9eSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1327b7c46309SBarry Smith 1328b7c46309SBarry Smith /* copy over the B part */ 1329ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1330b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1331b7c46309SBarry Smith row = a->rstart; 13320452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1333dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1334b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1335416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1336b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1337b7c46309SBarry Smith } 13380452661fSBarry Smith PetscFree(ct); 13396d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 13406d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 13413638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 13420de55854SLois Curfman McInnes *matout = B; 13430de55854SLois Curfman McInnes } else { 13440de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 13450452661fSBarry Smith PetscFree(a->rowners); 13460de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 13470de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 13480452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 13490452661fSBarry Smith if (a->garray) PetscFree(a->garray); 13500de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1351a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 13520452661fSBarry Smith PetscFree(a); 1353416022c9SBarry Smith PetscMemcpy(A,B,sizeof(struct _Mat)); 13540452661fSBarry Smith PetscHeaderDestroy(B); 13550de55854SLois Curfman McInnes } 1356b7c46309SBarry Smith return 0; 1357b7c46309SBarry Smith } 1358b7c46309SBarry Smith 13595615d1e5SSatish Balay #undef __FUNC__ 13605615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIAIJ" 13614b967eb1SSatish Balay int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1362a008b906SSatish Balay { 13634b967eb1SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 13644b967eb1SSatish Balay Mat a = aij->A, b = aij->B; 1365a008b906SSatish Balay int ierr,s1,s2,s3; 1366a008b906SSatish Balay 13674b967eb1SSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 13684b967eb1SSatish Balay if (rr) { 13694b967eb1SSatish Balay s3 = aij->n; 13704b967eb1SSatish Balay VecGetLocalSize_Fast(rr,s1); 1371e3372554SBarry Smith if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 13724b967eb1SSatish Balay /* Overlap communication with computation. */ 137343a90d84SBarry Smith ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1374a008b906SSatish Balay } 13754b967eb1SSatish Balay if (ll) { 13764b967eb1SSatish Balay VecGetLocalSize_Fast(ll,s1); 1377e3372554SBarry Smith if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1378c351683dSSatish Balay ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 13794b967eb1SSatish Balay } 13804b967eb1SSatish Balay /* scale the diagonal block */ 1381c351683dSSatish Balay ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 13824b967eb1SSatish Balay 13834b967eb1SSatish Balay if (rr) { 13844b967eb1SSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 138543a90d84SBarry Smith ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1386c351683dSSatish Balay ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 13874b967eb1SSatish Balay } 13884b967eb1SSatish Balay 1389a008b906SSatish Balay return 0; 1390a008b906SSatish Balay } 1391a008b906SSatish Balay 1392a008b906SSatish Balay 1393682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat); 13945615d1e5SSatish Balay #undef __FUNC__ 13955eea60f9SBarry Smith #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */ 13968f6be9afSLois Curfman McInnes int MatPrintHelp_MPIAIJ(Mat A) 1397682d7d0cSBarry Smith { 1398682d7d0cSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1399682d7d0cSBarry Smith 1400682d7d0cSBarry Smith if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1401682d7d0cSBarry Smith else return 0; 1402682d7d0cSBarry Smith } 1403682d7d0cSBarry Smith 14045615d1e5SSatish Balay #undef __FUNC__ 14055eea60f9SBarry Smith #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */ 14068f6be9afSLois Curfman McInnes int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 14075a838052SSatish Balay { 14085a838052SSatish Balay *bs = 1; 14095a838052SSatish Balay return 0; 14105a838052SSatish Balay } 14115615d1e5SSatish Balay #undef __FUNC__ 14125eea60f9SBarry Smith #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */ 14138f6be9afSLois Curfman McInnes int MatSetUnfactored_MPIAIJ(Mat A) 1414bb5a7306SBarry Smith { 1415bb5a7306SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1416bb5a7306SBarry Smith int ierr; 1417bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1418bb5a7306SBarry Smith return 0; 1419bb5a7306SBarry Smith } 1420bb5a7306SBarry Smith 14218f6be9afSLois Curfman McInnes extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 14222f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 14230a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 14240a198c4cSBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 14258a729477SBarry Smith /* -------------------------------------------------------------------*/ 14262ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 142739e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 142844a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 142944a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 143036ce4990SBarry Smith 0,0, 143136ce4990SBarry Smith 0,0, 143236ce4990SBarry Smith 0,0, 143344a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1434b7c46309SBarry Smith MatTranspose_MPIAIJ, 1435a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1436a008b906SSatish Balay MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1437ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 14381eb62cbbSBarry Smith 0, 1439299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 144036ce4990SBarry Smith 0,0,0,0, 1441d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 144236ce4990SBarry Smith 0,0, 144394a9d846SBarry Smith 0,0,MatConvertSameType_MPIAIJ,0,0, 1444b49de8d1SLois Curfman McInnes 0,0,0, 1445598137ffSSatish Balay MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1446052efed2SBarry Smith MatPrintHelp_MPIAIJ, 14473b2fbd54SBarry Smith MatScale_MPIAIJ,0,0,0, 14480a198c4cSBarry Smith MatGetBlockSize_MPIAIJ,0,0,0,0, 1449bb5a7306SBarry Smith MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ}; 145036ce4990SBarry Smith 14518a729477SBarry Smith 14525615d1e5SSatish Balay #undef __FUNC__ 14535615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIAIJ" 14541987afe7SBarry Smith /*@C 1455ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 14563a511b96SLois Curfman McInnes (the default parallel PETSc format). For good matrix assembly performance 14573a511b96SLois Curfman McInnes the user should preallocate the matrix storage by setting the parameters 14583a511b96SLois Curfman McInnes d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 14593a511b96SLois Curfman McInnes performance can be increased by more than a factor of 50. 14608a729477SBarry Smith 14618a729477SBarry Smith Input Parameters: 14621eb62cbbSBarry Smith . comm - MPI communicator 14637d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 146492e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 146592e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 14661a3896d6SBarry Smith . n - This value should be the same as the local size used in creating the 14671a3896d6SBarry Smith x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 14681a3896d6SBarry Smith calculated if N is given) For square matrices n is almost always m. 14697d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 147092e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1471ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1472ff756334SLois Curfman McInnes (same for all local rows) 14732bd5e0b2SLois Curfman McInnes . d_nzz - array containing the number of nonzeros in the various rows of the 147492e8d321SLois Curfman McInnes diagonal portion of the local submatrix (possibly different for each row) 14752bd5e0b2SLois Curfman McInnes or PETSC_NULL. You must leave room for the diagonal entry even if 14762bd5e0b2SLois Curfman McInnes it is zero. 14772bd5e0b2SLois Curfman McInnes . o_nz - number of nonzeros per row in the off-diagonal portion of local 1478ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 14792bd5e0b2SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various rows of the 14802bd5e0b2SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 14812bd5e0b2SLois Curfman McInnes each row) or PETSC_NULL. 14828a729477SBarry Smith 1483ff756334SLois Curfman McInnes Output Parameter: 148444cd7ae7SLois Curfman McInnes . A - the matrix 14858a729477SBarry Smith 1486ff756334SLois Curfman McInnes Notes: 1487ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1488ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 14890002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 14900002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1491ff756334SLois Curfman McInnes 1492ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1493ff756334SLois Curfman McInnes (possibly both). 1494ff756334SLois Curfman McInnes 14955511cfe3SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 14965511cfe3SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 14975511cfe3SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 14985511cfe3SLois Curfman McInnes 14995511cfe3SLois Curfman McInnes Options Database Keys: 15005511cfe3SLois Curfman McInnes $ -mat_aij_no_inode - Do not use inodes 15016e62573dSLois Curfman McInnes $ -mat_aij_inode_limit <limit> - Set inode limit. 15026e62573dSLois Curfman McInnes $ (max limit=5) 15036e62573dSLois Curfman McInnes $ -mat_aij_oneindex - Internally use indexing starting at 1 15046e62573dSLois Curfman McInnes $ rather than 0. Note: When calling MatSetValues(), 15056e62573dSLois Curfman McInnes $ the user still MUST index entries starting at 0! 15065511cfe3SLois Curfman McInnes 1507e0245417SLois Curfman McInnes Storage Information: 1508e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1509e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1510e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1511e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1512e0245417SLois Curfman McInnes 1513e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 15145ace5be8SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set 15155ace5be8SLois Curfman McInnes d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 15165ace5be8SLois Curfman McInnes memory allocation. Likewise, specify preallocated storage for the 15175ace5be8SLois Curfman McInnes off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1518ff756334SLois Curfman McInnes 15195511cfe3SLois Curfman McInnes Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 15205511cfe3SLois Curfman McInnes the figure below we depict these three local rows and all columns (0-11). 15212191d07cSBarry Smith 1522b810aeb4SBarry Smith $ 0 1 2 3 4 5 6 7 8 9 10 11 1523b810aeb4SBarry Smith $ ------------------- 15245511cfe3SLois Curfman McInnes $ row 3 | o o o d d d o o o o o o 15255511cfe3SLois Curfman McInnes $ row 4 | o o o d d d o o o o o o 15265511cfe3SLois Curfman McInnes $ row 5 | o o o d d d o o o o o o 15275511cfe3SLois Curfman McInnes $ ------------------- 1528b810aeb4SBarry Smith $ 1529b810aeb4SBarry Smith 15305511cfe3SLois Curfman McInnes Thus, any entries in the d locations are stored in the d (diagonal) 15315511cfe3SLois Curfman McInnes submatrix, and any entries in the o locations are stored in the 15325511cfe3SLois Curfman McInnes o (off-diagonal) submatrix. Note that the d and the o submatrices are 15335511cfe3SLois Curfman McInnes stored simply in the MATSEQAIJ format for compressed row storage. 15345511cfe3SLois Curfman McInnes 15355511cfe3SLois Curfman McInnes Now d_nz should indicate the number of nonzeros per row in the d matrix, 15365511cfe3SLois Curfman McInnes and o_nz should indicate the number of nonzeros per row in the o matrix. 15375511cfe3SLois Curfman McInnes In general, for PDE problems in which most nonzeros are near the diagonal, 15383d323bbdSBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 153992e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 15406da5968aSLois Curfman McInnes matrices. 15413a511b96SLois Curfman McInnes 1542dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1543ff756334SLois Curfman McInnes 1544fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 15458a729477SBarry Smith @*/ 15461eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 154744cd7ae7SLois Curfman McInnes int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 15488a729477SBarry Smith { 154944cd7ae7SLois Curfman McInnes Mat B; 155044cd7ae7SLois Curfman McInnes Mat_MPIAIJ *b; 155136ce4990SBarry Smith int ierr, i,sum[2],work[2],size; 1552416022c9SBarry Smith 155344cd7ae7SLois Curfman McInnes *A = 0; 155444cd7ae7SLois Curfman McInnes PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 155544cd7ae7SLois Curfman McInnes PLogObjectCreate(B); 155644cd7ae7SLois Curfman McInnes B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 155744cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_MPIAIJ)); 155844cd7ae7SLois Curfman McInnes PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 155936ce4990SBarry Smith MPI_Comm_size(comm,&size); 156036ce4990SBarry Smith if (size == 1) { 156136ce4990SBarry Smith B->ops.getrowij = MatGetRowIJ_MPIAIJ; 156236ce4990SBarry Smith B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 156336ce4990SBarry Smith B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 156436ce4990SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 156536ce4990SBarry Smith B->ops.lufactor = MatLUFactor_MPIAIJ; 156636ce4990SBarry Smith B->ops.solve = MatSolve_MPIAIJ; 156736ce4990SBarry Smith B->ops.solveadd = MatSolveAdd_MPIAIJ; 156836ce4990SBarry Smith B->ops.solvetrans = MatSolveTrans_MPIAIJ; 156936ce4990SBarry Smith B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 157036ce4990SBarry Smith B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 157136ce4990SBarry Smith } 157244cd7ae7SLois Curfman McInnes B->destroy = MatDestroy_MPIAIJ; 157344cd7ae7SLois Curfman McInnes B->view = MatView_MPIAIJ; 157444cd7ae7SLois Curfman McInnes B->factor = 0; 157544cd7ae7SLois Curfman McInnes B->assembled = PETSC_FALSE; 157690f02eecSBarry Smith B->mapping = 0; 1577d6dfbf8fSBarry Smith 157847794344SBarry Smith B->insertmode = NOT_SET_VALUES; 157944cd7ae7SLois Curfman McInnes MPI_Comm_rank(comm,&b->rank); 158044cd7ae7SLois Curfman McInnes MPI_Comm_size(comm,&b->size); 15811eb62cbbSBarry Smith 1582b4fd4287SBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1583e3372554SBarry Smith SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 15841987afe7SBarry Smith 1585dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 15861eb62cbbSBarry Smith work[0] = m; work[1] = n; 1587d65a2f8fSBarry Smith MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1588dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1589dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 15901eb62cbbSBarry Smith } 159144cd7ae7SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 159244cd7ae7SLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 159344cd7ae7SLois Curfman McInnes b->m = m; B->m = m; 159444cd7ae7SLois Curfman McInnes b->n = n; B->n = n; 159544cd7ae7SLois Curfman McInnes b->N = N; B->N = N; 159644cd7ae7SLois Curfman McInnes b->M = M; B->M = M; 15971eb62cbbSBarry Smith 15981eb62cbbSBarry Smith /* build local table of row and column ownerships */ 159944cd7ae7SLois Curfman McInnes b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 160044cd7ae7SLois Curfman McInnes PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1601603f58a4SSatish Balay b->cowners = b->rowners + b->size + 2; 160244cd7ae7SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 160344cd7ae7SLois Curfman McInnes b->rowners[0] = 0; 160444cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 160544cd7ae7SLois Curfman McInnes b->rowners[i] += b->rowners[i-1]; 16068a729477SBarry Smith } 160744cd7ae7SLois Curfman McInnes b->rstart = b->rowners[b->rank]; 160844cd7ae7SLois Curfman McInnes b->rend = b->rowners[b->rank+1]; 160944cd7ae7SLois Curfman McInnes MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 161044cd7ae7SLois Curfman McInnes b->cowners[0] = 0; 161144cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 161244cd7ae7SLois Curfman McInnes b->cowners[i] += b->cowners[i-1]; 16138a729477SBarry Smith } 161444cd7ae7SLois Curfman McInnes b->cstart = b->cowners[b->rank]; 161544cd7ae7SLois Curfman McInnes b->cend = b->cowners[b->rank+1]; 16168a729477SBarry Smith 16175ace5be8SLois Curfman McInnes if (d_nz == PETSC_DEFAULT) d_nz = 5; 1618029af93fSBarry Smith ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 161944cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->A); 16207b8455f0SLois Curfman McInnes if (o_nz == PETSC_DEFAULT) o_nz = 0; 1621029af93fSBarry Smith ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 162244cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->B); 16238a729477SBarry Smith 16241eb62cbbSBarry Smith /* build cache for off array entries formed */ 162544cd7ae7SLois Curfman McInnes ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 162690f02eecSBarry Smith b->donotstash = 0; 162744cd7ae7SLois Curfman McInnes b->colmap = 0; 162844cd7ae7SLois Curfman McInnes b->garray = 0; 162944cd7ae7SLois Curfman McInnes b->roworiented = 1; 16308a729477SBarry Smith 16311eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 163244cd7ae7SLois Curfman McInnes b->lvec = 0; 163344cd7ae7SLois Curfman McInnes b->Mvctx = 0; 16348a729477SBarry Smith 16357a0afa10SBarry Smith /* stuff for MatGetRow() */ 163644cd7ae7SLois Curfman McInnes b->rowindices = 0; 163744cd7ae7SLois Curfman McInnes b->rowvalues = 0; 163844cd7ae7SLois Curfman McInnes b->getrowactive = PETSC_FALSE; 16397a0afa10SBarry Smith 164044cd7ae7SLois Curfman McInnes *A = B; 1641d6dfbf8fSBarry Smith return 0; 1642d6dfbf8fSBarry Smith } 1643c74985f6SBarry Smith 16445615d1e5SSatish Balay #undef __FUNC__ 16455615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIAIJ" 16468f6be9afSLois Curfman McInnes int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1647d6dfbf8fSBarry Smith { 1648d6dfbf8fSBarry Smith Mat mat; 1649416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1650a1b97e82SLois Curfman McInnes int ierr, len=0, flg; 1651d6dfbf8fSBarry Smith 1652416022c9SBarry Smith *newmat = 0; 16530452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1654a5a9c739SBarry Smith PLogObjectCreate(mat); 16550452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1656416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 165744a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 165844a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1659d6dfbf8fSBarry Smith mat->factor = matin->factor; 1660c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1661d6dfbf8fSBarry Smith 166244cd7ae7SLois Curfman McInnes a->m = mat->m = oldmat->m; 166344cd7ae7SLois Curfman McInnes a->n = mat->n = oldmat->n; 166444cd7ae7SLois Curfman McInnes a->M = mat->M = oldmat->M; 166544cd7ae7SLois Curfman McInnes a->N = mat->N = oldmat->N; 1666d6dfbf8fSBarry Smith 1667416022c9SBarry Smith a->rstart = oldmat->rstart; 1668416022c9SBarry Smith a->rend = oldmat->rend; 1669416022c9SBarry Smith a->cstart = oldmat->cstart; 1670416022c9SBarry Smith a->cend = oldmat->cend; 167117699dbbSLois Curfman McInnes a->size = oldmat->size; 167217699dbbSLois Curfman McInnes a->rank = oldmat->rank; 167347794344SBarry Smith mat->insertmode = NOT_SET_VALUES; 1674bcd2baecSBarry Smith a->rowvalues = 0; 1675bcd2baecSBarry Smith a->getrowactive = PETSC_FALSE; 1676d6dfbf8fSBarry Smith 1677603f58a4SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1678603f58a4SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1679603f58a4SSatish Balay a->cowners = a->rowners + a->size + 2; 1680603f58a4SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1681416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 16822ee70a88SLois Curfman McInnes if (oldmat->colmap) { 16830452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1684416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1685416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1686416022c9SBarry Smith } else a->colmap = 0; 16873f41c07dSBarry Smith if (oldmat->garray) { 16883f41c07dSBarry Smith len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 16893f41c07dSBarry Smith a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1690464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 16913f41c07dSBarry Smith if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1692416022c9SBarry Smith } else a->garray = 0; 1693d6dfbf8fSBarry Smith 1694416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1695416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1696a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1697416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1698416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1699416022c9SBarry Smith PLogObjectParent(mat,a->A); 1700416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1701416022c9SBarry Smith PLogObjectParent(mat,a->B); 17025dd7a6c7SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 170325cdf11fSBarry Smith if (flg) { 1704682d7d0cSBarry Smith ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1705682d7d0cSBarry Smith } 17068a729477SBarry Smith *newmat = mat; 17078a729477SBarry Smith return 0; 17088a729477SBarry Smith } 1709416022c9SBarry Smith 171077c4ece6SBarry Smith #include "sys.h" 1711416022c9SBarry Smith 17125615d1e5SSatish Balay #undef __FUNC__ 17135615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIAIJ" 171419bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1715416022c9SBarry Smith { 1716d65a2f8fSBarry Smith Mat A; 1717416022c9SBarry Smith int i, nz, ierr, j,rstart, rend, fd; 1718d65a2f8fSBarry Smith Scalar *vals,*svals; 171919bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 1720416022c9SBarry Smith MPI_Status status; 172117699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1722d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 172319bcc07fSBarry Smith int tag = ((PetscObject)viewer)->tag; 1724416022c9SBarry Smith 172517699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 172617699dbbSLois Curfman McInnes if (!rank) { 172790ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 172877c4ece6SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1729e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1730416022c9SBarry Smith } 1731416022c9SBarry Smith 1732416022c9SBarry Smith MPI_Bcast(header+1,3,MPI_INT,0,comm); 1733416022c9SBarry Smith M = header[1]; N = header[2]; 1734416022c9SBarry Smith /* determine ownership of all rows */ 173517699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 17360452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1737416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1738416022c9SBarry Smith rowners[0] = 0; 173917699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1740416022c9SBarry Smith rowners[i] += rowners[i-1]; 1741416022c9SBarry Smith } 174217699dbbSLois Curfman McInnes rstart = rowners[rank]; 174317699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1744416022c9SBarry Smith 1745416022c9SBarry Smith /* distribute row lengths to all processors */ 17460452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1747416022c9SBarry Smith offlens = ourlens + (rend-rstart); 174817699dbbSLois Curfman McInnes if (!rank) { 17490452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 175077c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 17510452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 175217699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1753d65a2f8fSBarry Smith MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 17540452661fSBarry Smith PetscFree(sndcounts); 1755416022c9SBarry Smith } 1756416022c9SBarry Smith else { 1757416022c9SBarry Smith MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1758416022c9SBarry Smith } 1759416022c9SBarry Smith 176017699dbbSLois Curfman McInnes if (!rank) { 1761416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 17620452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1763cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 176417699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1765416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1766416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1767416022c9SBarry Smith } 1768416022c9SBarry Smith } 17690452661fSBarry Smith PetscFree(rowlengths); 1770416022c9SBarry Smith 1771416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1772416022c9SBarry Smith maxnz = 0; 177317699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 17740452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 1775416022c9SBarry Smith } 17760452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1777416022c9SBarry Smith 1778416022c9SBarry Smith /* read in my part of the matrix column indices */ 1779416022c9SBarry Smith nz = procsnz[0]; 17800452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 178177c4ece6SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1782d65a2f8fSBarry Smith 1783d65a2f8fSBarry Smith /* read in every one elses and ship off */ 178417699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1785d65a2f8fSBarry Smith nz = procsnz[i]; 178677c4ece6SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1787d65a2f8fSBarry Smith MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1788d65a2f8fSBarry Smith } 17890452661fSBarry Smith PetscFree(cols); 1790416022c9SBarry Smith } 1791416022c9SBarry Smith else { 1792416022c9SBarry Smith /* determine buffer space needed for message */ 1793416022c9SBarry Smith nz = 0; 1794416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1795416022c9SBarry Smith nz += ourlens[i]; 1796416022c9SBarry Smith } 17970452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1798416022c9SBarry Smith 1799416022c9SBarry Smith /* receive message of column indices*/ 1800d65a2f8fSBarry Smith MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1801416022c9SBarry Smith MPI_Get_count(&status,MPI_INT,&maxnz); 1802e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1803416022c9SBarry Smith } 1804416022c9SBarry Smith 1805416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1806cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 1807416022c9SBarry Smith jj = 0; 1808416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1809416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1810d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1811416022c9SBarry Smith jj++; 1812416022c9SBarry Smith } 1813416022c9SBarry Smith } 1814d65a2f8fSBarry Smith 1815d65a2f8fSBarry Smith /* create our matrix */ 1816416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1817416022c9SBarry Smith ourlens[i] -= offlens[i]; 1818416022c9SBarry Smith } 1819d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1820d65a2f8fSBarry Smith A = *newmat; 18216d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 1822d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1823d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1824d65a2f8fSBarry Smith } 1825416022c9SBarry Smith 182617699dbbSLois Curfman McInnes if (!rank) { 18270452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1828416022c9SBarry Smith 1829416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1830416022c9SBarry Smith nz = procsnz[0]; 183177c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1832d65a2f8fSBarry Smith 1833d65a2f8fSBarry Smith /* insert into matrix */ 1834d65a2f8fSBarry Smith jj = rstart; 1835d65a2f8fSBarry Smith smycols = mycols; 1836d65a2f8fSBarry Smith svals = vals; 1837d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1838d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1839d65a2f8fSBarry Smith smycols += ourlens[i]; 1840d65a2f8fSBarry Smith svals += ourlens[i]; 1841d65a2f8fSBarry Smith jj++; 1842416022c9SBarry Smith } 1843416022c9SBarry Smith 1844d65a2f8fSBarry Smith /* read in other processors and ship out */ 184517699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1846416022c9SBarry Smith nz = procsnz[i]; 184777c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1848d65a2f8fSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1849416022c9SBarry Smith } 18500452661fSBarry Smith PetscFree(procsnz); 1851416022c9SBarry Smith } 1852d65a2f8fSBarry Smith else { 1853d65a2f8fSBarry Smith /* receive numeric values */ 18540452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1855416022c9SBarry Smith 1856d65a2f8fSBarry Smith /* receive message of values*/ 1857d65a2f8fSBarry Smith MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1858d65a2f8fSBarry Smith MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1859e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1860d65a2f8fSBarry Smith 1861d65a2f8fSBarry Smith /* insert into matrix */ 1862d65a2f8fSBarry Smith jj = rstart; 1863d65a2f8fSBarry Smith smycols = mycols; 1864d65a2f8fSBarry Smith svals = vals; 1865d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1866d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1867d65a2f8fSBarry Smith smycols += ourlens[i]; 1868d65a2f8fSBarry Smith svals += ourlens[i]; 1869d65a2f8fSBarry Smith jj++; 1870d65a2f8fSBarry Smith } 1871d65a2f8fSBarry Smith } 18720452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1873d65a2f8fSBarry Smith 18746d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 18756d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1876416022c9SBarry Smith return 0; 1877416022c9SBarry Smith } 1878