1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*ba4e3ef2SSatish Balay static char vcid[] = "$Id: mpiaij.c,v 1.209 1997/07/09 20:54:04 balay Exp balay $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 53369ce9aSBarry Smith #include "pinclude/pviewer.h" 670f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h" 7f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 8d9942c19SSatish Balay #include "src/inline/spops.h" 98a729477SBarry Smith 109e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 119e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 129e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 139e25ed09SBarry Smith length of colmap equals the global matrix length. 149e25ed09SBarry Smith */ 155615d1e5SSatish Balay #undef __FUNC__ 165eea60f9SBarry Smith #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */ 170a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat) 189e25ed09SBarry Smith { 1944a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 20ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 21905e6a2fSBarry Smith int n = B->n,i; 22dbb450caSBarry Smith 23758f045eSSatish Balay aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 24464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 25cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 26905e6a2fSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 279e25ed09SBarry Smith return 0; 289e25ed09SBarry Smith } 299e25ed09SBarry Smith 302493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 312493cbb0SBarry Smith 325615d1e5SSatish Balay #undef __FUNC__ 335615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIAIJ" 348f6be9afSLois Curfman McInnes int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 353b2fbd54SBarry Smith PetscTruth *done) 36299609e3SLois Curfman McInnes { 37299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 38299609e3SLois Curfman McInnes int ierr; 3917699dbbSLois Curfman McInnes if (aij->size == 1) { 403b2fbd54SBarry Smith ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 41e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 423b2fbd54SBarry Smith return 0; 433b2fbd54SBarry Smith } 443b2fbd54SBarry Smith 455615d1e5SSatish Balay #undef __FUNC__ 465eea60f9SBarry Smith #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */ 478f6be9afSLois Curfman McInnes int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 483b2fbd54SBarry Smith PetscTruth *done) 493b2fbd54SBarry Smith { 503b2fbd54SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 513b2fbd54SBarry Smith int ierr; 523b2fbd54SBarry Smith if (aij->size == 1) { 533b2fbd54SBarry Smith ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 54e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 55299609e3SLois Curfman McInnes return 0; 56299609e3SLois Curfman McInnes } 57299609e3SLois Curfman McInnes 580520107fSSatish Balay #define CHUNKSIZE 15 5930770e4dSSatish Balay #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 600520107fSSatish Balay { \ 610520107fSSatish Balay \ 620520107fSSatish Balay rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 6330770e4dSSatish Balay rmax = aimax[row]; nrow = ailen[row]; \ 64f5e9677aSSatish Balay col1 = col - shift; \ 65f5e9677aSSatish Balay \ 66*ba4e3ef2SSatish Balay low = 0; high = nrow; \ 67*ba4e3ef2SSatish Balay while (high-low > 5) { \ 68*ba4e3ef2SSatish Balay t = (low+high)/2; \ 69*ba4e3ef2SSatish Balay if (rp[t] > col) high = t; \ 70*ba4e3ef2SSatish Balay else low = t; \ 71*ba4e3ef2SSatish Balay } \ 720520107fSSatish Balay for ( _i=0; _i<nrow; _i++ ) { \ 73f5e9677aSSatish Balay if (rp[_i] > col1) break; \ 74f5e9677aSSatish Balay if (rp[_i] == col1) { \ 750520107fSSatish Balay if (addv == ADD_VALUES) ap[_i] += value; \ 760520107fSSatish Balay else ap[_i] = value; \ 7730770e4dSSatish Balay goto a_noinsert; \ 780520107fSSatish Balay } \ 790520107fSSatish Balay } \ 8089280ab3SLois Curfman McInnes if (nonew == 1) goto a_noinsert; \ 8111ebbc71SLois Curfman McInnes else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 820520107fSSatish Balay if (nrow >= rmax) { \ 830520107fSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 840520107fSSatish Balay int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 850520107fSSatish Balay Scalar *new_a; \ 860520107fSSatish Balay \ 8711ebbc71SLois Curfman McInnes if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 8889280ab3SLois Curfman McInnes \ 890520107fSSatish Balay /* malloc new storage space */ \ 900520107fSSatish Balay len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 910520107fSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 920520107fSSatish Balay new_j = (int *) (new_a + new_nz); \ 930520107fSSatish Balay new_i = new_j + new_nz; \ 940520107fSSatish Balay \ 950520107fSSatish Balay /* copy over old data into new slots */ \ 960520107fSSatish Balay for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 970520107fSSatish Balay for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 980520107fSSatish Balay PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 990520107fSSatish Balay len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 1000520107fSSatish Balay PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 1010520107fSSatish Balay len*sizeof(int)); \ 1020520107fSSatish Balay PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 1030520107fSSatish Balay PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 1040520107fSSatish Balay len*sizeof(Scalar)); \ 1050520107fSSatish Balay /* free up old matrix storage */ \ 106f5e9677aSSatish Balay \ 1070520107fSSatish Balay PetscFree(a->a); \ 1080520107fSSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 1090520107fSSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 1100520107fSSatish Balay a->singlemalloc = 1; \ 1110520107fSSatish Balay \ 1120520107fSSatish Balay rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 11330770e4dSSatish Balay rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 1140520107fSSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 1150520107fSSatish Balay a->maxnz += CHUNKSIZE; \ 1160520107fSSatish Balay a->reallocs++; \ 1170520107fSSatish Balay } \ 1180520107fSSatish Balay N = nrow++ - 1; a->nz++; \ 1190520107fSSatish Balay /* shift up all the later entries in this row */ \ 1200520107fSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 1210520107fSSatish Balay rp[ii+1] = rp[ii]; \ 1220520107fSSatish Balay ap[ii+1] = ap[ii]; \ 1230520107fSSatish Balay } \ 124f5e9677aSSatish Balay rp[_i] = col1; \ 1250520107fSSatish Balay ap[_i] = value; \ 12630770e4dSSatish Balay a_noinsert: ; \ 1270520107fSSatish Balay ailen[row] = nrow; \ 1280520107fSSatish Balay } 1290a198c4cSBarry Smith 13030770e4dSSatish Balay #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 13130770e4dSSatish Balay { \ 13230770e4dSSatish Balay \ 13330770e4dSSatish Balay rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 13430770e4dSSatish Balay rmax = bimax[row]; nrow = bilen[row]; \ 13530770e4dSSatish Balay col1 = col - shift; \ 13630770e4dSSatish Balay \ 137*ba4e3ef2SSatish Balay low = 0; high = nrow; \ 138*ba4e3ef2SSatish Balay while (high-low > 5) { \ 139*ba4e3ef2SSatish Balay t = (low+high)/2; \ 140*ba4e3ef2SSatish Balay if (rp[t] > col) high = t; \ 141*ba4e3ef2SSatish Balay else low = t; \ 142*ba4e3ef2SSatish Balay } \ 14330770e4dSSatish Balay for ( _i=0; _i<nrow; _i++ ) { \ 14430770e4dSSatish Balay if (rp[_i] > col1) break; \ 14530770e4dSSatish Balay if (rp[_i] == col1) { \ 14630770e4dSSatish Balay if (addv == ADD_VALUES) ap[_i] += value; \ 14730770e4dSSatish Balay else ap[_i] = value; \ 14830770e4dSSatish Balay goto b_noinsert; \ 14930770e4dSSatish Balay } \ 15030770e4dSSatish Balay } \ 15189280ab3SLois Curfman McInnes if (nonew == 1) goto b_noinsert; \ 15211ebbc71SLois Curfman McInnes else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 15330770e4dSSatish Balay if (nrow >= rmax) { \ 15430770e4dSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 15574c639caSSatish Balay int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 15630770e4dSSatish Balay Scalar *new_a; \ 15730770e4dSSatish Balay \ 15811ebbc71SLois Curfman McInnes if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 15989280ab3SLois Curfman McInnes \ 16030770e4dSSatish Balay /* malloc new storage space */ \ 16174c639caSSatish Balay len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 16230770e4dSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 16330770e4dSSatish Balay new_j = (int *) (new_a + new_nz); \ 16430770e4dSSatish Balay new_i = new_j + new_nz; \ 16530770e4dSSatish Balay \ 16630770e4dSSatish Balay /* copy over old data into new slots */ \ 16730770e4dSSatish Balay for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 16874c639caSSatish Balay for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 16930770e4dSSatish Balay PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 17030770e4dSSatish Balay len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 17130770e4dSSatish Balay PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 17230770e4dSSatish Balay len*sizeof(int)); \ 17330770e4dSSatish Balay PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 17430770e4dSSatish Balay PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 17530770e4dSSatish Balay len*sizeof(Scalar)); \ 17630770e4dSSatish Balay /* free up old matrix storage */ \ 17730770e4dSSatish Balay \ 17874c639caSSatish Balay PetscFree(b->a); \ 17974c639caSSatish Balay if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 18074c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 18174c639caSSatish Balay b->singlemalloc = 1; \ 18230770e4dSSatish Balay \ 18330770e4dSSatish Balay rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 18430770e4dSSatish Balay rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 18574c639caSSatish Balay PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 18674c639caSSatish Balay b->maxnz += CHUNKSIZE; \ 18774c639caSSatish Balay b->reallocs++; \ 18830770e4dSSatish Balay } \ 18974c639caSSatish Balay N = nrow++ - 1; b->nz++; \ 19030770e4dSSatish Balay /* shift up all the later entries in this row */ \ 19130770e4dSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 19230770e4dSSatish Balay rp[ii+1] = rp[ii]; \ 19330770e4dSSatish Balay ap[ii+1] = ap[ii]; \ 19430770e4dSSatish Balay } \ 19530770e4dSSatish Balay rp[_i] = col1; \ 19630770e4dSSatish Balay ap[_i] = value; \ 19730770e4dSSatish Balay b_noinsert: ; \ 19830770e4dSSatish Balay bilen[row] = nrow; \ 19930770e4dSSatish Balay } 20030770e4dSSatish Balay 2010520107fSSatish Balay extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 2025615d1e5SSatish Balay #undef __FUNC__ 2035615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIAIJ" 2048f6be9afSLois Curfman McInnes int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 2058a729477SBarry Smith { 20644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 2074b0e389bSBarry Smith Scalar value; 2081eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 2091eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 210905e6a2fSBarry Smith int roworiented = aij->roworiented; 2118a729477SBarry Smith 2120520107fSSatish Balay /* Some Variables required in the macro */ 2134ee7247eSSatish Balay Mat A = aij->A; 2144ee7247eSSatish Balay Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 21530770e4dSSatish Balay int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 21630770e4dSSatish Balay Scalar *aa = a->a; 21730770e4dSSatish Balay 21830770e4dSSatish Balay Mat B = aij->B; 21930770e4dSSatish Balay Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 22030770e4dSSatish Balay int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 22130770e4dSSatish Balay Scalar *ba = b->a; 22230770e4dSSatish Balay 223*ba4e3ef2SSatish Balay int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 22430770e4dSSatish Balay int nonew = a->nonew,shift = a->indexshift; 22530770e4dSSatish Balay Scalar *ap; 2264ee7247eSSatish Balay 2278a729477SBarry Smith for ( i=0; i<m; i++ ) { 2280a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 229e3372554SBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 230e3372554SBarry Smith if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 2310a198c4cSBarry Smith #endif 2324b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 2334b0e389bSBarry Smith row = im[i] - rstart; 2341eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 2354b0e389bSBarry Smith if (in[j] >= cstart && in[j] < cend){ 2364b0e389bSBarry Smith col = in[j] - cstart; 2374b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 23830770e4dSSatish Balay MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 2390520107fSSatish Balay /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 2401eb62cbbSBarry Smith } 2410a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 242e3372554SBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 243e3372554SBarry Smith else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 2440a198c4cSBarry Smith #endif 2451eb62cbbSBarry Smith else { 246227d817aSBarry Smith if (mat->was_assembled) { 247905e6a2fSBarry Smith if (!aij->colmap) { 248905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 249905e6a2fSBarry Smith } 250905e6a2fSBarry Smith col = aij->colmap[in[j]] - 1; 251ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 2522493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2534b0e389bSBarry Smith col = in[j]; 2549bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 255f9508a3cSSatish Balay B = aij->B; 256f9508a3cSSatish Balay b = (Mat_SeqAIJ *) B->data; 257f9508a3cSSatish Balay bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 258f9508a3cSSatish Balay ba = b->a; 259d6dfbf8fSBarry Smith } 2609e25ed09SBarry Smith } 2614b0e389bSBarry Smith else col = in[j]; 2624b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 26330770e4dSSatish Balay MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 26430770e4dSSatish Balay /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 2651eb62cbbSBarry Smith } 2661eb62cbbSBarry Smith } 2671eb62cbbSBarry Smith } 2681eb62cbbSBarry Smith else { 26990f02eecSBarry Smith if (roworiented && !aij->donotstash) { 2704b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 2714b0e389bSBarry Smith } 2724b0e389bSBarry Smith else { 27390f02eecSBarry Smith if (!aij->donotstash) { 2744b0e389bSBarry Smith row = im[i]; 2754b0e389bSBarry Smith for ( j=0; j<n; j++ ) { 2764b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 2774b0e389bSBarry Smith } 2784b0e389bSBarry Smith } 2791eb62cbbSBarry Smith } 2808a729477SBarry Smith } 28190f02eecSBarry Smith } 2828a729477SBarry Smith return 0; 2838a729477SBarry Smith } 2848a729477SBarry Smith 2855615d1e5SSatish Balay #undef __FUNC__ 2865615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIAIJ" 2878f6be9afSLois Curfman McInnes int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 288b49de8d1SLois Curfman McInnes { 289b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 290b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 291b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 292b49de8d1SLois Curfman McInnes 293b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 294e3372554SBarry Smith if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 295e3372554SBarry Smith if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 296b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 297b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 298b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 299e3372554SBarry Smith if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 300e3372554SBarry Smith if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 301b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 302b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 303b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 304b49de8d1SLois Curfman McInnes } 305b49de8d1SLois Curfman McInnes else { 306905e6a2fSBarry Smith if (!aij->colmap) { 307905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 308905e6a2fSBarry Smith } 309905e6a2fSBarry Smith col = aij->colmap[idxn[j]] - 1; 310e60e1c95SSatish Balay if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 311d9d09a02SSatish Balay else { 312b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 313b49de8d1SLois Curfman McInnes } 314b49de8d1SLois Curfman McInnes } 315b49de8d1SLois Curfman McInnes } 316d9d09a02SSatish Balay } 317b49de8d1SLois Curfman McInnes else { 318e3372554SBarry Smith SETERRQ(1,0,"Only local values currently supported"); 319b49de8d1SLois Curfman McInnes } 320b49de8d1SLois Curfman McInnes } 321b49de8d1SLois Curfman McInnes return 0; 322b49de8d1SLois Curfman McInnes } 323b49de8d1SLois Curfman McInnes 3245615d1e5SSatish Balay #undef __FUNC__ 3255615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 3268f6be9afSLois Curfman McInnes int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 3278a729477SBarry Smith { 32844a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 329d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 33017699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 33117699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 3321eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 3336abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 3341eb62cbbSBarry Smith InsertMode addv; 3351eb62cbbSBarry Smith Scalar *rvalues,*svalues; 3361eb62cbbSBarry Smith 3371eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 33847794344SBarry Smith MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 339dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 340e3372554SBarry Smith SETERRQ(1,0,"Some processors inserted others added"); 3411eb62cbbSBarry Smith } 34247794344SBarry Smith mat->insertmode = addv; /* in case this processor had no cache */ 3431eb62cbbSBarry Smith 3441eb62cbbSBarry Smith /* first count number of contributors to each processor */ 3450452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 346cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 3470452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 3481eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 3491eb62cbbSBarry Smith idx = aij->stash.idx[i]; 35017699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 3511eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3521eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 3538a729477SBarry Smith } 3548a729477SBarry Smith } 3558a729477SBarry Smith } 35617699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 3571eb62cbbSBarry Smith 3581eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3590452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 36017699dbbSLois Curfman McInnes MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 36117699dbbSLois Curfman McInnes nreceives = work[rank]; 36217699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 36317699dbbSLois Curfman McInnes nmax = work[rank]; 3640452661fSBarry Smith PetscFree(work); 3651eb62cbbSBarry Smith 3661eb62cbbSBarry Smith /* post receives: 3671eb62cbbSBarry Smith 1) each message will consist of ordered pairs 3681eb62cbbSBarry Smith (global index,value) we store the global index as a double 369d6dfbf8fSBarry Smith to simplify the message passing. 3701eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 3711eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 3721eb62cbbSBarry Smith this is a lot of wasted space. 3731eb62cbbSBarry Smith 3741eb62cbbSBarry Smith 3751eb62cbbSBarry Smith This could be done better. 3761eb62cbbSBarry Smith */ 3770452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 37878b31e54SBarry Smith CHKPTRQ(rvalues); 3790452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 38078b31e54SBarry Smith CHKPTRQ(recv_waits); 3811eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 382416022c9SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 3831eb62cbbSBarry Smith comm,recv_waits+i); 3841eb62cbbSBarry Smith } 3851eb62cbbSBarry Smith 3861eb62cbbSBarry Smith /* do sends: 3871eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3881eb62cbbSBarry Smith the ith processor 3891eb62cbbSBarry Smith */ 3900452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 3910452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 39278b31e54SBarry Smith CHKPTRQ(send_waits); 3930452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 3941eb62cbbSBarry Smith starts[0] = 0; 39517699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3961eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 3971eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 3981eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 3991eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 4001eb62cbbSBarry Smith } 4010452661fSBarry Smith PetscFree(owner); 4021eb62cbbSBarry Smith starts[0] = 0; 40317699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 4041eb62cbbSBarry Smith count = 0; 40517699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 4061eb62cbbSBarry Smith if (procs[i]) { 407416022c9SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 4081eb62cbbSBarry Smith comm,send_waits+count++); 4091eb62cbbSBarry Smith } 4101eb62cbbSBarry Smith } 4110452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 4121eb62cbbSBarry Smith 4131eb62cbbSBarry Smith /* Free cache space */ 41455908cccSLois Curfman McInnes PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 41578b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 4161eb62cbbSBarry Smith 4171eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 4181eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 4191eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 4201eb62cbbSBarry Smith aij->rmax = nmax; 4211eb62cbbSBarry Smith 4221eb62cbbSBarry Smith return 0; 4231eb62cbbSBarry Smith } 42444a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 4251eb62cbbSBarry Smith 4265615d1e5SSatish Balay #undef __FUNC__ 4275615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 4288f6be9afSLois Curfman McInnes int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 4291eb62cbbSBarry Smith { 43044a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 4311eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 432416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 433905e6a2fSBarry Smith int row,col,other_disassembled; 4341eb62cbbSBarry Smith Scalar *values,val; 43547794344SBarry Smith InsertMode addv = mat->insertmode; 4361eb62cbbSBarry Smith 4371eb62cbbSBarry Smith /* wait on receives */ 4381eb62cbbSBarry Smith while (count) { 439d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 4401eb62cbbSBarry Smith /* unpack receives into our local space */ 441d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 442c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 4431eb62cbbSBarry Smith n = n/3; 4441eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 445227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - aij->rstart; 446227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 4471eb62cbbSBarry Smith val = values[3*i+2]; 4481eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 4491eb62cbbSBarry Smith col -= aij->cstart; 4506fd7127cSSatish Balay ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 4511eb62cbbSBarry Smith } 4521eb62cbbSBarry Smith else { 45355a1b374SBarry Smith if (mat->was_assembled || mat->assembled) { 454905e6a2fSBarry Smith if (!aij->colmap) { 455905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 456905e6a2fSBarry Smith } 457905e6a2fSBarry Smith col = aij->colmap[col] - 1; 458ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4592493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 460227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 461d6dfbf8fSBarry Smith } 4629e25ed09SBarry Smith } 4636fd7127cSSatish Balay ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 4641eb62cbbSBarry Smith } 4651eb62cbbSBarry Smith } 4661eb62cbbSBarry Smith count--; 4671eb62cbbSBarry Smith } 4680452661fSBarry Smith PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 4691eb62cbbSBarry Smith 4701eb62cbbSBarry Smith /* wait on sends */ 4711eb62cbbSBarry Smith if (aij->nsends) { 4720a198c4cSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 4731eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 4740452661fSBarry Smith PetscFree(send_status); 4751eb62cbbSBarry Smith } 4760452661fSBarry Smith PetscFree(aij->send_waits); PetscFree(aij->svalues); 4771eb62cbbSBarry Smith 47878b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 47978b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 4801eb62cbbSBarry Smith 4812493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 4822493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 483227d817aSBarry Smith MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 484227d817aSBarry Smith if (mat->was_assembled && !other_disassembled) { 4852493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 4862493cbb0SBarry Smith } 4872493cbb0SBarry Smith 4886d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 48978b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 4905e42470aSBarry Smith } 49178b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 49278b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 4935e42470aSBarry Smith 4947a0afa10SBarry Smith if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 4958a729477SBarry Smith return 0; 4968a729477SBarry Smith } 4978a729477SBarry Smith 4985615d1e5SSatish Balay #undef __FUNC__ 4995615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIAIJ" 5008f6be9afSLois Curfman McInnes int MatZeroEntries_MPIAIJ(Mat A) 5011eb62cbbSBarry Smith { 50244a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 503dbd7a890SLois Curfman McInnes int ierr; 50478b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 50578b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 5061eb62cbbSBarry Smith return 0; 5071eb62cbbSBarry Smith } 5081eb62cbbSBarry Smith 5091eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 5101eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 5111eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 5121eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 5131eb62cbbSBarry Smith routine. 5141eb62cbbSBarry Smith */ 5155615d1e5SSatish Balay #undef __FUNC__ 5165615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIAIJ" 5178f6be9afSLois Curfman McInnes int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 5181eb62cbbSBarry Smith { 51944a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 52017699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 5216abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 52217699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 5235392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 524d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 525d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 5261eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 5271eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 5281eb62cbbSBarry Smith IS istmp; 5291eb62cbbSBarry Smith 53077c4ece6SBarry Smith ierr = ISGetSize(is,&N); CHKERRQ(ierr); 53178b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 5321eb62cbbSBarry Smith 5331eb62cbbSBarry Smith /* first count number of contributors to each processor */ 5340452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 535cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 5360452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 5371eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 5381eb62cbbSBarry Smith idx = rows[i]; 5391eb62cbbSBarry Smith found = 0; 54017699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 5411eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 5421eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 5431eb62cbbSBarry Smith } 5441eb62cbbSBarry Smith } 545e3372554SBarry Smith if (!found) SETERRQ(1,0,"Index out of range"); 5461eb62cbbSBarry Smith } 54717699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 5481eb62cbbSBarry Smith 5491eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 5500452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 55117699dbbSLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 55217699dbbSLois Curfman McInnes nrecvs = work[rank]; 55317699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 55417699dbbSLois Curfman McInnes nmax = work[rank]; 5550452661fSBarry Smith PetscFree(work); 5561eb62cbbSBarry Smith 5571eb62cbbSBarry Smith /* post receives: */ 5580452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 55978b31e54SBarry Smith CHKPTRQ(rvalues); 5600452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 56178b31e54SBarry Smith CHKPTRQ(recv_waits); 5621eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 563416022c9SBarry Smith MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 5641eb62cbbSBarry Smith } 5651eb62cbbSBarry Smith 5661eb62cbbSBarry Smith /* do sends: 5671eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 5681eb62cbbSBarry Smith the ith processor 5691eb62cbbSBarry Smith */ 5700452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 5710452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 57278b31e54SBarry Smith CHKPTRQ(send_waits); 5730452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 5741eb62cbbSBarry Smith starts[0] = 0; 57517699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 5761eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 5771eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 5781eb62cbbSBarry Smith } 5791eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 5801eb62cbbSBarry Smith 5811eb62cbbSBarry Smith starts[0] = 0; 58217699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 5831eb62cbbSBarry Smith count = 0; 58417699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 5851eb62cbbSBarry Smith if (procs[i]) { 586416022c9SBarry Smith MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 5871eb62cbbSBarry Smith } 5881eb62cbbSBarry Smith } 5890452661fSBarry Smith PetscFree(starts); 5901eb62cbbSBarry Smith 59117699dbbSLois Curfman McInnes base = owners[rank]; 5921eb62cbbSBarry Smith 5931eb62cbbSBarry Smith /* wait on receives */ 5940452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 5951eb62cbbSBarry Smith source = lens + nrecvs; 5961eb62cbbSBarry Smith count = nrecvs; slen = 0; 5971eb62cbbSBarry Smith while (count) { 598d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 5991eb62cbbSBarry Smith /* unpack receives into our local space */ 6001eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 601d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 602d6dfbf8fSBarry Smith lens[imdex] = n; 6031eb62cbbSBarry Smith slen += n; 6041eb62cbbSBarry Smith count--; 6051eb62cbbSBarry Smith } 6060452661fSBarry Smith PetscFree(recv_waits); 6071eb62cbbSBarry Smith 6081eb62cbbSBarry Smith /* move the data into the send scatter */ 6090452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 6101eb62cbbSBarry Smith count = 0; 6111eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 6121eb62cbbSBarry Smith values = rvalues + i*nmax; 6131eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 6141eb62cbbSBarry Smith lrows[count++] = values[j] - base; 6151eb62cbbSBarry Smith } 6161eb62cbbSBarry Smith } 6170452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 6180452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 6191eb62cbbSBarry Smith 6201eb62cbbSBarry Smith /* actually zap the local rows */ 621029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 622464493b3SBarry Smith PLogObjectParent(A,istmp); 6230452661fSBarry Smith PetscFree(lrows); 62478b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 62578b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 62678b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 6271eb62cbbSBarry Smith 6281eb62cbbSBarry Smith /* wait on sends */ 6291eb62cbbSBarry Smith if (nsends) { 6300452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 63178b31e54SBarry Smith CHKPTRQ(send_status); 6321eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 6330452661fSBarry Smith PetscFree(send_status); 6341eb62cbbSBarry Smith } 6350452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 6361eb62cbbSBarry Smith 6371eb62cbbSBarry Smith return 0; 6381eb62cbbSBarry Smith } 6391eb62cbbSBarry Smith 6405615d1e5SSatish Balay #undef __FUNC__ 6415615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIAIJ" 6428f6be9afSLois Curfman McInnes int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 6431eb62cbbSBarry Smith { 644416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 645fbd6ef76SBarry Smith int ierr,nt; 646416022c9SBarry Smith 647a2ce50c7SBarry Smith ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 648fbd6ef76SBarry Smith if (nt != a->n) { 649f40265daSLois Curfman McInnes SETERRQ(1,0,"Incompatible partition of A and xx"); 650fbd6ef76SBarry Smith } 65143a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 65238597bd4SSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 65343a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 65438597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 6551eb62cbbSBarry Smith return 0; 6561eb62cbbSBarry Smith } 6571eb62cbbSBarry Smith 6585615d1e5SSatish Balay #undef __FUNC__ 6595615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIAIJ" 6608f6be9afSLois Curfman McInnes int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 661da3a660dSBarry Smith { 662416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 663da3a660dSBarry Smith int ierr; 66443a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 66538597bd4SSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 66643a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 66738597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 668da3a660dSBarry Smith return 0; 669da3a660dSBarry Smith } 670da3a660dSBarry Smith 6715615d1e5SSatish Balay #undef __FUNC__ 6725615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIAIJ" 6738f6be9afSLois Curfman McInnes int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 674da3a660dSBarry Smith { 675416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 676da3a660dSBarry Smith int ierr; 677da3a660dSBarry Smith 678da3a660dSBarry Smith /* do nondiagonal part */ 67938597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 680da3a660dSBarry Smith /* send it on its way */ 681537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 682da3a660dSBarry Smith /* do local part */ 68338597bd4SSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 684da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 685da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 686da3a660dSBarry Smith /* but is not perhaps always true. */ 687537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 688da3a660dSBarry Smith return 0; 689da3a660dSBarry Smith } 690da3a660dSBarry Smith 6915615d1e5SSatish Balay #undef __FUNC__ 6925615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIAIJ" 6938f6be9afSLois Curfman McInnes int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 694da3a660dSBarry Smith { 695416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 696da3a660dSBarry Smith int ierr; 697da3a660dSBarry Smith 698da3a660dSBarry Smith /* do nondiagonal part */ 69938597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 700da3a660dSBarry Smith /* send it on its way */ 701537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 702da3a660dSBarry Smith /* do local part */ 70338597bd4SSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 704da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 705da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 706da3a660dSBarry Smith /* but is not perhaps always true. */ 7070a198c4cSBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 708da3a660dSBarry Smith return 0; 709da3a660dSBarry Smith } 710da3a660dSBarry Smith 7111eb62cbbSBarry Smith /* 7121eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 7131eb62cbbSBarry Smith diagonal block 7141eb62cbbSBarry Smith */ 7155615d1e5SSatish Balay #undef __FUNC__ 7165615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIAIJ" 7178f6be9afSLois Curfman McInnes int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 7181eb62cbbSBarry Smith { 719416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 72044cd7ae7SLois Curfman McInnes if (a->M != a->N) 721e3372554SBarry Smith SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 7225baf8537SBarry Smith if (a->rstart != a->cstart || a->rend != a->cend) { 723e3372554SBarry Smith SETERRQ(1,0,"row partition must equal col partition"); } 724416022c9SBarry Smith return MatGetDiagonal(a->A,v); 7251eb62cbbSBarry Smith } 7261eb62cbbSBarry Smith 7275615d1e5SSatish Balay #undef __FUNC__ 7285615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIAIJ" 7298f6be9afSLois Curfman McInnes int MatScale_MPIAIJ(Scalar *aa,Mat A) 730052efed2SBarry Smith { 731052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 732052efed2SBarry Smith int ierr; 733052efed2SBarry Smith ierr = MatScale(aa,a->A); CHKERRQ(ierr); 734052efed2SBarry Smith ierr = MatScale(aa,a->B); CHKERRQ(ierr); 735052efed2SBarry Smith return 0; 736052efed2SBarry Smith } 737052efed2SBarry Smith 7385615d1e5SSatish Balay #undef __FUNC__ 7395eea60f9SBarry Smith #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */ 7408f6be9afSLois Curfman McInnes int MatDestroy_MPIAIJ(PetscObject obj) 7411eb62cbbSBarry Smith { 7421eb62cbbSBarry Smith Mat mat = (Mat) obj; 74344a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 7441eb62cbbSBarry Smith int ierr; 745a5a9c739SBarry Smith #if defined(PETSC_LOG) 7466e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 747a5a9c739SBarry Smith #endif 7480452661fSBarry Smith PetscFree(aij->rowners); 74978b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 75078b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 7510452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 7520452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 7531eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 754a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 7557a0afa10SBarry Smith if (aij->rowvalues) PetscFree(aij->rowvalues); 7560452661fSBarry Smith PetscFree(aij); 75790f02eecSBarry Smith if (mat->mapping) { 75890f02eecSBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 75990f02eecSBarry Smith } 760a5a9c739SBarry Smith PLogObjectDestroy(mat); 7610452661fSBarry Smith PetscHeaderDestroy(mat); 7621eb62cbbSBarry Smith return 0; 7631eb62cbbSBarry Smith } 764ee50ffe9SBarry Smith 7655615d1e5SSatish Balay #undef __FUNC__ 7665eea60f9SBarry Smith #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */ 7678f6be9afSLois Curfman McInnes extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 7681eb62cbbSBarry Smith { 769416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 770416022c9SBarry Smith int ierr; 771416022c9SBarry Smith 77217699dbbSLois Curfman McInnes if (aij->size == 1) { 773416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 774416022c9SBarry Smith } 775e3372554SBarry Smith else SETERRQ(1,0,"Only uniprocessor output supported"); 776416022c9SBarry Smith return 0; 777416022c9SBarry Smith } 778416022c9SBarry Smith 7795615d1e5SSatish Balay #undef __FUNC__ 7805eea60f9SBarry Smith #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */ 7818f6be9afSLois Curfman McInnes extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 782416022c9SBarry Smith { 78344a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 784dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 785a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 786d636dbe3SBarry Smith FILE *fd; 78719bcc07fSBarry Smith ViewerType vtype; 788416022c9SBarry Smith 78919bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 79019bcc07fSBarry Smith if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 79190ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 7920a198c4cSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 7934e220ebcSLois Curfman McInnes MatInfo info; 7944e220ebcSLois Curfman McInnes int flg; 795a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 79690ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 7974e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 79895e01e2fSLois Curfman McInnes ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 79977c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 80095e01e2fSLois Curfman McInnes if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 8014e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 80295e01e2fSLois Curfman McInnes else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 8034e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 8044e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 8054e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 8064e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 8074e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 808a56f8943SBarry Smith fflush(fd); 80977c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 810a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 811416022c9SBarry Smith return 0; 812416022c9SBarry Smith } 8130a198c4cSBarry Smith else if (format == VIEWER_FORMAT_ASCII_INFO) { 81408480c60SBarry Smith return 0; 81508480c60SBarry Smith } 816416022c9SBarry Smith } 817416022c9SBarry Smith 81819bcc07fSBarry Smith if (vtype == DRAW_VIEWER) { 81919bcc07fSBarry Smith Draw draw; 82019bcc07fSBarry Smith PetscTruth isnull; 82119bcc07fSBarry Smith ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 82219bcc07fSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 82319bcc07fSBarry Smith } 82419bcc07fSBarry Smith 82519bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER) { 82690ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 82777c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 828d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 82917699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 8301eb62cbbSBarry Smith aij->cend); 83178b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 83278b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 833d13ab20cSBarry Smith fflush(fd); 83477c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 835d13ab20cSBarry Smith } 836416022c9SBarry Smith else { 837a56f8943SBarry Smith int size = aij->size; 838a56f8943SBarry Smith rank = aij->rank; 83917699dbbSLois Curfman McInnes if (size == 1) { 84078b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 84195373324SBarry Smith } 84295373324SBarry Smith else { 84395373324SBarry Smith /* assemble the entire matrix onto first processor. */ 84495373324SBarry Smith Mat A; 845ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 8462eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 84795373324SBarry Smith Scalar *a; 8482ee70a88SLois Curfman McInnes 84917699dbbSLois Curfman McInnes if (!rank) { 850b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 851c451ab25SLois Curfman McInnes CHKERRQ(ierr); 85295373324SBarry Smith } 85395373324SBarry Smith else { 854b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 855c451ab25SLois Curfman McInnes CHKERRQ(ierr); 85695373324SBarry Smith } 857464493b3SBarry Smith PLogObjectParent(mat,A); 858416022c9SBarry Smith 85995373324SBarry Smith /* copy over the A part */ 860ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 8612ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 86295373324SBarry Smith row = aij->rstart; 863dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 86495373324SBarry Smith for ( i=0; i<m; i++ ) { 865416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 86695373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 86795373324SBarry Smith } 8682ee70a88SLois Curfman McInnes aj = Aloc->j; 869dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 87095373324SBarry Smith 87195373324SBarry Smith /* copy over the B part */ 872ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 8732ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 87495373324SBarry Smith row = aij->rstart; 8750452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 876dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 87795373324SBarry Smith for ( i=0; i<m; i++ ) { 878416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 87995373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 88095373324SBarry Smith } 8810452661fSBarry Smith PetscFree(ct); 8826d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 8836d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 88417699dbbSLois Curfman McInnes if (!rank) { 88578b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 88695373324SBarry Smith } 88778b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 88895373324SBarry Smith } 88995373324SBarry Smith } 8901eb62cbbSBarry Smith return 0; 8911eb62cbbSBarry Smith } 8921eb62cbbSBarry Smith 8935615d1e5SSatish Balay #undef __FUNC__ 8945eea60f9SBarry Smith #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */ 8958f6be9afSLois Curfman McInnes int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 896416022c9SBarry Smith { 897416022c9SBarry Smith Mat mat = (Mat) obj; 898416022c9SBarry Smith int ierr; 89919bcc07fSBarry Smith ViewerType vtype; 900416022c9SBarry Smith 90119bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 90219bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 90319bcc07fSBarry Smith vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 904d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 905416022c9SBarry Smith } 90619bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 907416022c9SBarry Smith return MatView_MPIAIJ_Binary(mat,viewer); 908416022c9SBarry Smith } 909416022c9SBarry Smith return 0; 910416022c9SBarry Smith } 911416022c9SBarry Smith 9121eb62cbbSBarry Smith /* 9131eb62cbbSBarry Smith This has to provide several versions. 9141eb62cbbSBarry Smith 9151eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 9161eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 917d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 9181eb62cbbSBarry Smith */ 9195615d1e5SSatish Balay #undef __FUNC__ 9205615d1e5SSatish Balay #define __FUNC__ "MatRelax_MPIAIJ" 9218f6be9afSLois Curfman McInnes int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 922dbb450caSBarry Smith double fshift,int its,Vec xx) 9238a729477SBarry Smith { 92444a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 925d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 926ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 927c16cb8f2SBarry Smith Scalar *b,*x,*xs,*ls,d,*v,sum; 9286abc6512SBarry Smith int ierr,*idx, *diag; 929416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 9308a729477SBarry Smith 931d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 932dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 933dbb450caSBarry Smith ls = ls + shift; 934ec8511deSBarry Smith if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 935d6dfbf8fSBarry Smith diag = A->diag; 936c16cb8f2SBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 937da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 93838597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 939da3a660dSBarry Smith } 94043a90d84SBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 94178b31e54SBarry Smith CHKERRQ(ierr); 94243a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 94378b31e54SBarry Smith CHKERRQ(ierr); 944d6dfbf8fSBarry Smith while (its--) { 945d6dfbf8fSBarry Smith /* go down through the rows */ 946d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 947d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 948dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 949dbb450caSBarry Smith v = A->a + A->i[i] + shift; 950d6dfbf8fSBarry Smith sum = b[i]; 951d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 952dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 953d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 954dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 955dbb450caSBarry Smith v = B->a + B->i[i] + shift; 956d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 95755a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 958d6dfbf8fSBarry Smith } 959d6dfbf8fSBarry Smith /* come up through the rows */ 960d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 961d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 962dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 963dbb450caSBarry Smith v = A->a + A->i[i] + shift; 964d6dfbf8fSBarry Smith sum = b[i]; 965d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 966dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 967d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 968dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 969dbb450caSBarry Smith v = B->a + B->i[i] + shift; 970d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 97155a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 972d6dfbf8fSBarry Smith } 973d6dfbf8fSBarry Smith } 974d6dfbf8fSBarry Smith } 975d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 976da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 97738597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 978da3a660dSBarry Smith } 97943a90d84SBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 98078b31e54SBarry Smith CHKERRQ(ierr); 98143a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 98278b31e54SBarry Smith CHKERRQ(ierr); 983d6dfbf8fSBarry Smith while (its--) { 984d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 985d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 986dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 987dbb450caSBarry Smith v = A->a + A->i[i] + shift; 988d6dfbf8fSBarry Smith sum = b[i]; 989d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 990dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 991d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 992dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 993dbb450caSBarry Smith v = B->a + B->i[i] + shift; 994d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 99555a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 996d6dfbf8fSBarry Smith } 997d6dfbf8fSBarry Smith } 998d6dfbf8fSBarry Smith } 999d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1000da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 100138597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 1002da3a660dSBarry Smith } 100343a90d84SBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 100478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 100543a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 100678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 1007d6dfbf8fSBarry Smith while (its--) { 1008d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 1009d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 1010dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 1011dbb450caSBarry Smith v = A->a + A->i[i] + shift; 1012d6dfbf8fSBarry Smith sum = b[i]; 1013d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 1014dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 1015d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 1016dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 1017dbb450caSBarry Smith v = B->a + B->i[i] + shift; 1018d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 101955a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1020d6dfbf8fSBarry Smith } 1021d6dfbf8fSBarry Smith } 1022d6dfbf8fSBarry Smith } 1023c16cb8f2SBarry Smith else { 1024e3372554SBarry Smith SETERRQ(1,0,"Option not supported"); 1025c16cb8f2SBarry Smith } 10268a729477SBarry Smith return 0; 10278a729477SBarry Smith } 1028a66be287SLois Curfman McInnes 10295615d1e5SSatish Balay #undef __FUNC__ 10305eea60f9SBarry Smith #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */ 10318f6be9afSLois Curfman McInnes int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1032a66be287SLois Curfman McInnes { 1033a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1034a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 10354e220ebcSLois Curfman McInnes int ierr; 10364e220ebcSLois Curfman McInnes double isend[5], irecv[5]; 1037a66be287SLois Curfman McInnes 10384e220ebcSLois Curfman McInnes info->rows_global = (double)mat->M; 10394e220ebcSLois Curfman McInnes info->columns_global = (double)mat->N; 10404e220ebcSLois Curfman McInnes info->rows_local = (double)mat->m; 10414e220ebcSLois Curfman McInnes info->columns_local = (double)mat->N; 10424e220ebcSLois Curfman McInnes info->block_size = 1.0; 10434e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 10444e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 10454e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 10464e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 10474e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 10484e220ebcSLois Curfman McInnes isend[3] += info->memory; isend[4] += info->mallocs; 1049a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 10504e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 10514e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 10524e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 10534e220ebcSLois Curfman McInnes info->memory = isend[3]; 10544e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 1055a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 10564e220ebcSLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 10574e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 10584e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 10594e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 10604e220ebcSLois Curfman McInnes info->memory = irecv[3]; 10614e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1062a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 10634e220ebcSLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 10644e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 10654e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 10664e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 10674e220ebcSLois Curfman McInnes info->memory = irecv[3]; 10684e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1069a66be287SLois Curfman McInnes } 10704e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 10714e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 10724e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 10734e220ebcSLois Curfman McInnes 1074a66be287SLois Curfman McInnes return 0; 1075a66be287SLois Curfman McInnes } 1076a66be287SLois Curfman McInnes 1077299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1078299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1079299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1080299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1081299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1082299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1083299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1084299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1085299609e3SLois Curfman McInnes 10865615d1e5SSatish Balay #undef __FUNC__ 10875eea60f9SBarry Smith #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */ 10888f6be9afSLois Curfman McInnes int MatSetOption_MPIAIJ(Mat A,MatOption op) 1089c74985f6SBarry Smith { 1090c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1091c74985f6SBarry Smith 10926d4a8577SBarry Smith if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 10936d4a8577SBarry Smith op == MAT_YES_NEW_NONZERO_LOCATIONS || 10946da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1095c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 109696854ed6SLois Curfman McInnes op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1097c2653b3dSLois Curfman McInnes op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1098b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 1099b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 1100b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1101aeafbbfcSLois Curfman McInnes a->roworiented = 1; 1102c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 1103c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 1104b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 11056da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 11066d4a8577SBarry Smith op == MAT_SYMMETRIC || 11076d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 11086d4a8577SBarry Smith op == MAT_YES_NEW_DIAGONALS) 110994a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 11106d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) { 11114b0e389bSBarry Smith a->roworiented = 0; 11124b0e389bSBarry Smith MatSetOption(a->A,op); 11134b0e389bSBarry Smith MatSetOption(a->B,op); 11142b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 111590f02eecSBarry Smith a->donotstash = 1; 111690f02eecSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) 1117e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1118c0bbcb79SLois Curfman McInnes else 1119e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1120c74985f6SBarry Smith return 0; 1121c74985f6SBarry Smith } 1122c74985f6SBarry Smith 11235615d1e5SSatish Balay #undef __FUNC__ 11245eea60f9SBarry Smith #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */ 11258f6be9afSLois Curfman McInnes int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1126c74985f6SBarry Smith { 112744a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1128c74985f6SBarry Smith *m = mat->M; *n = mat->N; 1129c74985f6SBarry Smith return 0; 1130c74985f6SBarry Smith } 1131c74985f6SBarry Smith 11325615d1e5SSatish Balay #undef __FUNC__ 11335eea60f9SBarry Smith #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */ 11348f6be9afSLois Curfman McInnes int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1135c74985f6SBarry Smith { 113644a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1137b7c46309SBarry Smith *m = mat->m; *n = mat->N; 1138c74985f6SBarry Smith return 0; 1139c74985f6SBarry Smith } 1140c74985f6SBarry Smith 11415615d1e5SSatish Balay #undef __FUNC__ 11425eea60f9SBarry Smith #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */ 11438f6be9afSLois Curfman McInnes int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1144c74985f6SBarry Smith { 114544a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1146c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 1147c74985f6SBarry Smith return 0; 1148c74985f6SBarry Smith } 1149c74985f6SBarry Smith 11506d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11516d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11526d84be18SBarry Smith 11535615d1e5SSatish Balay #undef __FUNC__ 11545615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIAIJ" 11556d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 115639e00950SLois Curfman McInnes { 1157154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 115870f0671dSBarry Smith Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1159154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1160154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 116170f0671dSBarry Smith int *cmap, *idx_p; 116239e00950SLois Curfman McInnes 1163e3372554SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 11647a0afa10SBarry Smith mat->getrowactive = PETSC_TRUE; 11657a0afa10SBarry Smith 116670f0671dSBarry Smith if (!mat->rowvalues && (idx || v)) { 11677a0afa10SBarry Smith /* 11687a0afa10SBarry Smith allocate enough space to hold information from the longest row. 11697a0afa10SBarry Smith */ 11707a0afa10SBarry Smith Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1171c16cb8f2SBarry Smith int max = 1,m = mat->m,tmp; 1172c16cb8f2SBarry Smith for ( i=0; i<m; i++ ) { 11737a0afa10SBarry Smith tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 11747a0afa10SBarry Smith if (max < tmp) { max = tmp; } 11757a0afa10SBarry Smith } 11767a0afa10SBarry Smith mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 11777a0afa10SBarry Smith CHKPTRQ(mat->rowvalues); 11787a0afa10SBarry Smith mat->rowindices = (int *) (mat->rowvalues + max); 11797a0afa10SBarry Smith } 11807a0afa10SBarry Smith 1181e3372554SBarry Smith if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1182abc0e9e4SLois Curfman McInnes lrow = row - rstart; 118339e00950SLois Curfman McInnes 1184154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1185154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1186154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 118738597bd4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 118838597bd4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1189154123eaSLois Curfman McInnes nztot = nzA + nzB; 1190154123eaSLois Curfman McInnes 119170f0671dSBarry Smith cmap = mat->garray; 1192154123eaSLois Curfman McInnes if (v || idx) { 1193154123eaSLois Curfman McInnes if (nztot) { 1194154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 119570f0671dSBarry Smith int imark = -1; 1196154123eaSLois Curfman McInnes if (v) { 119770f0671dSBarry Smith *v = v_p = mat->rowvalues; 119839e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 119970f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1200154123eaSLois Curfman McInnes else break; 1201154123eaSLois Curfman McInnes } 1202154123eaSLois Curfman McInnes imark = i; 120370f0671dSBarry Smith for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 120470f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1205154123eaSLois Curfman McInnes } 1206154123eaSLois Curfman McInnes if (idx) { 120770f0671dSBarry Smith *idx = idx_p = mat->rowindices; 120870f0671dSBarry Smith if (imark > -1) { 120970f0671dSBarry Smith for ( i=0; i<imark; i++ ) { 121070f0671dSBarry Smith idx_p[i] = cmap[cworkB[i]]; 121170f0671dSBarry Smith } 121270f0671dSBarry Smith } else { 1213154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 121470f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1215154123eaSLois Curfman McInnes else break; 1216154123eaSLois Curfman McInnes } 1217154123eaSLois Curfman McInnes imark = i; 121870f0671dSBarry Smith } 121970f0671dSBarry Smith for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 122070f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 122139e00950SLois Curfman McInnes } 122239e00950SLois Curfman McInnes } 12231ca473b0SSatish Balay else { 12241ca473b0SSatish Balay if (idx) *idx = 0; 12251ca473b0SSatish Balay if (v) *v = 0; 12261ca473b0SSatish Balay } 1227154123eaSLois Curfman McInnes } 122839e00950SLois Curfman McInnes *nz = nztot; 122938597bd4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 123038597bd4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 123139e00950SLois Curfman McInnes return 0; 123239e00950SLois Curfman McInnes } 123339e00950SLois Curfman McInnes 12345615d1e5SSatish Balay #undef __FUNC__ 12355eea60f9SBarry Smith #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */ 12366d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 123739e00950SLois Curfman McInnes { 12387a0afa10SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 12397a0afa10SBarry Smith if (aij->getrowactive == PETSC_FALSE) { 1240e3372554SBarry Smith SETERRQ(1,0,"MatGetRow not called"); 12417a0afa10SBarry Smith } 12427a0afa10SBarry Smith aij->getrowactive = PETSC_FALSE; 124339e00950SLois Curfman McInnes return 0; 124439e00950SLois Curfman McInnes } 124539e00950SLois Curfman McInnes 12465615d1e5SSatish Balay #undef __FUNC__ 12475615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIAIJ" 12488f6be9afSLois Curfman McInnes int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1249855ac2c5SLois Curfman McInnes { 1250855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1251ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1252416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1253416022c9SBarry Smith double sum = 0.0; 125404ca555eSLois Curfman McInnes Scalar *v; 125504ca555eSLois Curfman McInnes 125617699dbbSLois Curfman McInnes if (aij->size == 1) { 125714183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 125837fa93a5SLois Curfman McInnes } else { 125904ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 126004ca555eSLois Curfman McInnes v = amat->a; 126104ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 126204ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 126304ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 126404ca555eSLois Curfman McInnes #else 126504ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 126604ca555eSLois Curfman McInnes #endif 126704ca555eSLois Curfman McInnes } 126804ca555eSLois Curfman McInnes v = bmat->a; 126904ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 127004ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 127104ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 127204ca555eSLois Curfman McInnes #else 127304ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 127404ca555eSLois Curfman McInnes #endif 127504ca555eSLois Curfman McInnes } 12766d84be18SBarry Smith MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 127704ca555eSLois Curfman McInnes *norm = sqrt(*norm); 127804ca555eSLois Curfman McInnes } 127904ca555eSLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 128004ca555eSLois Curfman McInnes double *tmp, *tmp2; 128104ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 1282758f045eSSatish Balay tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1283758f045eSSatish Balay tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1284cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 128504ca555eSLois Curfman McInnes *norm = 0.0; 128604ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 128704ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1288579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 128904ca555eSLois Curfman McInnes } 129004ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 129104ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1292579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 129304ca555eSLois Curfman McInnes } 12946d84be18SBarry Smith MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 129504ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 129604ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 129704ca555eSLois Curfman McInnes } 12980452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 129904ca555eSLois Curfman McInnes } 130004ca555eSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 1301515d9167SLois Curfman McInnes double ntemp = 0.0; 130204ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1303dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 130404ca555eSLois Curfman McInnes sum = 0.0; 130504ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1306cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 130704ca555eSLois Curfman McInnes } 1308dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 130904ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1310cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 131104ca555eSLois Curfman McInnes } 1312515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 131304ca555eSLois Curfman McInnes } 13146d84be18SBarry Smith MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 131504ca555eSLois Curfman McInnes } 131604ca555eSLois Curfman McInnes else { 1317e3372554SBarry Smith SETERRQ(1,0,"No support for two norm"); 131804ca555eSLois Curfman McInnes } 131937fa93a5SLois Curfman McInnes } 1320855ac2c5SLois Curfman McInnes return 0; 1321855ac2c5SLois Curfman McInnes } 1322855ac2c5SLois Curfman McInnes 13235615d1e5SSatish Balay #undef __FUNC__ 13245615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIAIJ" 13258f6be9afSLois Curfman McInnes int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1326b7c46309SBarry Smith { 1327b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1328dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1329416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1330416022c9SBarry Smith Mat B; 1331b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1332b7c46309SBarry Smith Scalar *array; 1333b7c46309SBarry Smith 13343638b69dSLois Curfman McInnes if (matout == PETSC_NULL && M != N) 1335e3372554SBarry Smith SETERRQ(1,0,"Square matrix only for in-place"); 1336b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1337b4fd4287SBarry Smith PETSC_NULL,&B); CHKERRQ(ierr); 1338b7c46309SBarry Smith 1339b7c46309SBarry Smith /* copy over the A part */ 1340ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1341b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1342b7c46309SBarry Smith row = a->rstart; 1343dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1344b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1345416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1346b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1347b7c46309SBarry Smith } 1348b7c46309SBarry Smith aj = Aloc->j; 13494af08d9eSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1350b7c46309SBarry Smith 1351b7c46309SBarry Smith /* copy over the B part */ 1352ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1353b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1354b7c46309SBarry Smith row = a->rstart; 13550452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1356dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1357b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1358416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1359b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1360b7c46309SBarry Smith } 13610452661fSBarry Smith PetscFree(ct); 13626d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 13636d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 13643638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 13650de55854SLois Curfman McInnes *matout = B; 13660de55854SLois Curfman McInnes } else { 13670de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 13680452661fSBarry Smith PetscFree(a->rowners); 13690de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 13700de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 13710452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 13720452661fSBarry Smith if (a->garray) PetscFree(a->garray); 13730de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1374a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 13750452661fSBarry Smith PetscFree(a); 1376f09e8eb9SSatish Balay PetscMemcpy(A,B,sizeof(struct _p_Mat)); 13770452661fSBarry Smith PetscHeaderDestroy(B); 13780de55854SLois Curfman McInnes } 1379b7c46309SBarry Smith return 0; 1380b7c46309SBarry Smith } 1381b7c46309SBarry Smith 13825615d1e5SSatish Balay #undef __FUNC__ 13835615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIAIJ" 13844b967eb1SSatish Balay int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1385a008b906SSatish Balay { 13864b967eb1SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 13874b967eb1SSatish Balay Mat a = aij->A, b = aij->B; 1388a008b906SSatish Balay int ierr,s1,s2,s3; 1389a008b906SSatish Balay 13904b967eb1SSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 13914b967eb1SSatish Balay if (rr) { 13924b967eb1SSatish Balay s3 = aij->n; 13934b967eb1SSatish Balay VecGetLocalSize_Fast(rr,s1); 1394e3372554SBarry Smith if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 13954b967eb1SSatish Balay /* Overlap communication with computation. */ 139643a90d84SBarry Smith ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1397a008b906SSatish Balay } 13984b967eb1SSatish Balay if (ll) { 13994b967eb1SSatish Balay VecGetLocalSize_Fast(ll,s1); 1400e3372554SBarry Smith if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1401c351683dSSatish Balay ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 14024b967eb1SSatish Balay } 14034b967eb1SSatish Balay /* scale the diagonal block */ 1404c351683dSSatish Balay ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 14054b967eb1SSatish Balay 14064b967eb1SSatish Balay if (rr) { 14074b967eb1SSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 140843a90d84SBarry Smith ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1409c351683dSSatish Balay ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 14104b967eb1SSatish Balay } 14114b967eb1SSatish Balay 1412a008b906SSatish Balay return 0; 1413a008b906SSatish Balay } 1414a008b906SSatish Balay 1415a008b906SSatish Balay 1416682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat); 14175615d1e5SSatish Balay #undef __FUNC__ 14185eea60f9SBarry Smith #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */ 14198f6be9afSLois Curfman McInnes int MatPrintHelp_MPIAIJ(Mat A) 1420682d7d0cSBarry Smith { 1421682d7d0cSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1422682d7d0cSBarry Smith 1423682d7d0cSBarry Smith if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1424682d7d0cSBarry Smith else return 0; 1425682d7d0cSBarry Smith } 1426682d7d0cSBarry Smith 14275615d1e5SSatish Balay #undef __FUNC__ 14285eea60f9SBarry Smith #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */ 14298f6be9afSLois Curfman McInnes int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 14305a838052SSatish Balay { 14315a838052SSatish Balay *bs = 1; 14325a838052SSatish Balay return 0; 14335a838052SSatish Balay } 14345615d1e5SSatish Balay #undef __FUNC__ 14355eea60f9SBarry Smith #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */ 14368f6be9afSLois Curfman McInnes int MatSetUnfactored_MPIAIJ(Mat A) 1437bb5a7306SBarry Smith { 1438bb5a7306SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1439bb5a7306SBarry Smith int ierr; 1440bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1441bb5a7306SBarry Smith return 0; 1442bb5a7306SBarry Smith } 1443bb5a7306SBarry Smith 14448f6be9afSLois Curfman McInnes extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 14452f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 14460a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 14470a198c4cSBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 14488a729477SBarry Smith /* -------------------------------------------------------------------*/ 14492ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 145039e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 145144a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 145244a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 145336ce4990SBarry Smith 0,0, 145436ce4990SBarry Smith 0,0, 145536ce4990SBarry Smith 0,0, 145644a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1457b7c46309SBarry Smith MatTranspose_MPIAIJ, 1458a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1459a008b906SSatish Balay MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1460ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 14611eb62cbbSBarry Smith 0, 1462299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 146336ce4990SBarry Smith 0,0,0,0, 1464d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 146536ce4990SBarry Smith 0,0, 146694a9d846SBarry Smith 0,0,MatConvertSameType_MPIAIJ,0,0, 1467b49de8d1SLois Curfman McInnes 0,0,0, 1468598137ffSSatish Balay MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1469052efed2SBarry Smith MatPrintHelp_MPIAIJ, 14703b2fbd54SBarry Smith MatScale_MPIAIJ,0,0,0, 14710a198c4cSBarry Smith MatGetBlockSize_MPIAIJ,0,0,0,0, 1472bb5a7306SBarry Smith MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ}; 147336ce4990SBarry Smith 14748a729477SBarry Smith 14755615d1e5SSatish Balay #undef __FUNC__ 14765615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIAIJ" 14771987afe7SBarry Smith /*@C 1478ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 14793a511b96SLois Curfman McInnes (the default parallel PETSc format). For good matrix assembly performance 14803a511b96SLois Curfman McInnes the user should preallocate the matrix storage by setting the parameters 14813a511b96SLois Curfman McInnes d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 14823a511b96SLois Curfman McInnes performance can be increased by more than a factor of 50. 14838a729477SBarry Smith 14848a729477SBarry Smith Input Parameters: 14851eb62cbbSBarry Smith . comm - MPI communicator 14867d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 148792e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 148892e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 14891a3896d6SBarry Smith . n - This value should be the same as the local size used in creating the 14901a3896d6SBarry Smith x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 14911a3896d6SBarry Smith calculated if N is given) For square matrices n is almost always m. 14927d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 149392e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1494ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1495ff756334SLois Curfman McInnes (same for all local rows) 14962bd5e0b2SLois Curfman McInnes . d_nzz - array containing the number of nonzeros in the various rows of the 149792e8d321SLois Curfman McInnes diagonal portion of the local submatrix (possibly different for each row) 14982bd5e0b2SLois Curfman McInnes or PETSC_NULL. You must leave room for the diagonal entry even if 14992bd5e0b2SLois Curfman McInnes it is zero. 15002bd5e0b2SLois Curfman McInnes . o_nz - number of nonzeros per row in the off-diagonal portion of local 1501ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 15022bd5e0b2SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various rows of the 15032bd5e0b2SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 15042bd5e0b2SLois Curfman McInnes each row) or PETSC_NULL. 15058a729477SBarry Smith 1506ff756334SLois Curfman McInnes Output Parameter: 150744cd7ae7SLois Curfman McInnes . A - the matrix 15088a729477SBarry Smith 1509ff756334SLois Curfman McInnes Notes: 1510ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1511ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 15120002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 15130002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1514ff756334SLois Curfman McInnes 1515ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1516ff756334SLois Curfman McInnes (possibly both). 1517ff756334SLois Curfman McInnes 15185511cfe3SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 15195511cfe3SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 15205511cfe3SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 15215511cfe3SLois Curfman McInnes 15225511cfe3SLois Curfman McInnes Options Database Keys: 15235511cfe3SLois Curfman McInnes $ -mat_aij_no_inode - Do not use inodes 15246e62573dSLois Curfman McInnes $ -mat_aij_inode_limit <limit> - Set inode limit. 15256e62573dSLois Curfman McInnes $ (max limit=5) 15266e62573dSLois Curfman McInnes $ -mat_aij_oneindex - Internally use indexing starting at 1 15276e62573dSLois Curfman McInnes $ rather than 0. Note: When calling MatSetValues(), 15286e62573dSLois Curfman McInnes $ the user still MUST index entries starting at 0! 15295511cfe3SLois Curfman McInnes 1530e0245417SLois Curfman McInnes Storage Information: 1531e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1532e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1533e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1534e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1535e0245417SLois Curfman McInnes 1536e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 15375ace5be8SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set 15385ace5be8SLois Curfman McInnes d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 15395ace5be8SLois Curfman McInnes memory allocation. Likewise, specify preallocated storage for the 15405ace5be8SLois Curfman McInnes off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1541ff756334SLois Curfman McInnes 15425511cfe3SLois Curfman McInnes Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 15435511cfe3SLois Curfman McInnes the figure below we depict these three local rows and all columns (0-11). 15442191d07cSBarry Smith 1545b810aeb4SBarry Smith $ 0 1 2 3 4 5 6 7 8 9 10 11 1546b810aeb4SBarry Smith $ ------------------- 15475511cfe3SLois Curfman McInnes $ row 3 | o o o d d d o o o o o o 15485511cfe3SLois Curfman McInnes $ row 4 | o o o d d d o o o o o o 15495511cfe3SLois Curfman McInnes $ row 5 | o o o d d d o o o o o o 15505511cfe3SLois Curfman McInnes $ ------------------- 1551b810aeb4SBarry Smith $ 1552b810aeb4SBarry Smith 15535511cfe3SLois Curfman McInnes Thus, any entries in the d locations are stored in the d (diagonal) 15545511cfe3SLois Curfman McInnes submatrix, and any entries in the o locations are stored in the 15555511cfe3SLois Curfman McInnes o (off-diagonal) submatrix. Note that the d and the o submatrices are 15565511cfe3SLois Curfman McInnes stored simply in the MATSEQAIJ format for compressed row storage. 15575511cfe3SLois Curfman McInnes 15585511cfe3SLois Curfman McInnes Now d_nz should indicate the number of nonzeros per row in the d matrix, 15595511cfe3SLois Curfman McInnes and o_nz should indicate the number of nonzeros per row in the o matrix. 15605511cfe3SLois Curfman McInnes In general, for PDE problems in which most nonzeros are near the diagonal, 15613d323bbdSBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 156292e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 15636da5968aSLois Curfman McInnes matrices. 15643a511b96SLois Curfman McInnes 1565dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1566ff756334SLois Curfman McInnes 1567fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 15688a729477SBarry Smith @*/ 15691eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 157044cd7ae7SLois Curfman McInnes int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 15718a729477SBarry Smith { 157244cd7ae7SLois Curfman McInnes Mat B; 157344cd7ae7SLois Curfman McInnes Mat_MPIAIJ *b; 157436ce4990SBarry Smith int ierr, i,sum[2],work[2],size; 1575416022c9SBarry Smith 157644cd7ae7SLois Curfman McInnes *A = 0; 1577f09e8eb9SSatish Balay PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm); 157844cd7ae7SLois Curfman McInnes PLogObjectCreate(B); 157944cd7ae7SLois Curfman McInnes B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 158044cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_MPIAIJ)); 158144cd7ae7SLois Curfman McInnes PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 158236ce4990SBarry Smith MPI_Comm_size(comm,&size); 158336ce4990SBarry Smith if (size == 1) { 158436ce4990SBarry Smith B->ops.getrowij = MatGetRowIJ_MPIAIJ; 158536ce4990SBarry Smith B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 158636ce4990SBarry Smith B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 158736ce4990SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 158836ce4990SBarry Smith B->ops.lufactor = MatLUFactor_MPIAIJ; 158936ce4990SBarry Smith B->ops.solve = MatSolve_MPIAIJ; 159036ce4990SBarry Smith B->ops.solveadd = MatSolveAdd_MPIAIJ; 159136ce4990SBarry Smith B->ops.solvetrans = MatSolveTrans_MPIAIJ; 159236ce4990SBarry Smith B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 159336ce4990SBarry Smith B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 159436ce4990SBarry Smith } 159544cd7ae7SLois Curfman McInnes B->destroy = MatDestroy_MPIAIJ; 159644cd7ae7SLois Curfman McInnes B->view = MatView_MPIAIJ; 159744cd7ae7SLois Curfman McInnes B->factor = 0; 159844cd7ae7SLois Curfman McInnes B->assembled = PETSC_FALSE; 159990f02eecSBarry Smith B->mapping = 0; 1600d6dfbf8fSBarry Smith 160147794344SBarry Smith B->insertmode = NOT_SET_VALUES; 160244cd7ae7SLois Curfman McInnes MPI_Comm_rank(comm,&b->rank); 160344cd7ae7SLois Curfman McInnes MPI_Comm_size(comm,&b->size); 16041eb62cbbSBarry Smith 1605b4fd4287SBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1606e3372554SBarry Smith SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 16071987afe7SBarry Smith 1608dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 16091eb62cbbSBarry Smith work[0] = m; work[1] = n; 1610d65a2f8fSBarry Smith MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1611dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1612dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 16131eb62cbbSBarry Smith } 161444cd7ae7SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 161544cd7ae7SLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 161644cd7ae7SLois Curfman McInnes b->m = m; B->m = m; 161744cd7ae7SLois Curfman McInnes b->n = n; B->n = n; 161844cd7ae7SLois Curfman McInnes b->N = N; B->N = N; 161944cd7ae7SLois Curfman McInnes b->M = M; B->M = M; 16201eb62cbbSBarry Smith 16211eb62cbbSBarry Smith /* build local table of row and column ownerships */ 162244cd7ae7SLois Curfman McInnes b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1623f09e8eb9SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1624603f58a4SSatish Balay b->cowners = b->rowners + b->size + 2; 162544cd7ae7SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 162644cd7ae7SLois Curfman McInnes b->rowners[0] = 0; 162744cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 162844cd7ae7SLois Curfman McInnes b->rowners[i] += b->rowners[i-1]; 16298a729477SBarry Smith } 163044cd7ae7SLois Curfman McInnes b->rstart = b->rowners[b->rank]; 163144cd7ae7SLois Curfman McInnes b->rend = b->rowners[b->rank+1]; 163244cd7ae7SLois Curfman McInnes MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 163344cd7ae7SLois Curfman McInnes b->cowners[0] = 0; 163444cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 163544cd7ae7SLois Curfman McInnes b->cowners[i] += b->cowners[i-1]; 16368a729477SBarry Smith } 163744cd7ae7SLois Curfman McInnes b->cstart = b->cowners[b->rank]; 163844cd7ae7SLois Curfman McInnes b->cend = b->cowners[b->rank+1]; 16398a729477SBarry Smith 16405ace5be8SLois Curfman McInnes if (d_nz == PETSC_DEFAULT) d_nz = 5; 1641029af93fSBarry Smith ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 164244cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->A); 16437b8455f0SLois Curfman McInnes if (o_nz == PETSC_DEFAULT) o_nz = 0; 1644029af93fSBarry Smith ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 164544cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->B); 16468a729477SBarry Smith 16471eb62cbbSBarry Smith /* build cache for off array entries formed */ 164844cd7ae7SLois Curfman McInnes ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 164990f02eecSBarry Smith b->donotstash = 0; 165044cd7ae7SLois Curfman McInnes b->colmap = 0; 165144cd7ae7SLois Curfman McInnes b->garray = 0; 165244cd7ae7SLois Curfman McInnes b->roworiented = 1; 16538a729477SBarry Smith 16541eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 165544cd7ae7SLois Curfman McInnes b->lvec = 0; 165644cd7ae7SLois Curfman McInnes b->Mvctx = 0; 16578a729477SBarry Smith 16587a0afa10SBarry Smith /* stuff for MatGetRow() */ 165944cd7ae7SLois Curfman McInnes b->rowindices = 0; 166044cd7ae7SLois Curfman McInnes b->rowvalues = 0; 166144cd7ae7SLois Curfman McInnes b->getrowactive = PETSC_FALSE; 16627a0afa10SBarry Smith 166344cd7ae7SLois Curfman McInnes *A = B; 1664d6dfbf8fSBarry Smith return 0; 1665d6dfbf8fSBarry Smith } 1666c74985f6SBarry Smith 16675615d1e5SSatish Balay #undef __FUNC__ 16685615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIAIJ" 16698f6be9afSLois Curfman McInnes int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1670d6dfbf8fSBarry Smith { 1671d6dfbf8fSBarry Smith Mat mat; 1672416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1673a1b97e82SLois Curfman McInnes int ierr, len=0, flg; 1674d6dfbf8fSBarry Smith 1675416022c9SBarry Smith *newmat = 0; 1676f09e8eb9SSatish Balay PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1677a5a9c739SBarry Smith PLogObjectCreate(mat); 16780452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1679416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 168044a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 168144a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1682d6dfbf8fSBarry Smith mat->factor = matin->factor; 1683c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1684d6dfbf8fSBarry Smith 168544cd7ae7SLois Curfman McInnes a->m = mat->m = oldmat->m; 168644cd7ae7SLois Curfman McInnes a->n = mat->n = oldmat->n; 168744cd7ae7SLois Curfman McInnes a->M = mat->M = oldmat->M; 168844cd7ae7SLois Curfman McInnes a->N = mat->N = oldmat->N; 1689d6dfbf8fSBarry Smith 1690416022c9SBarry Smith a->rstart = oldmat->rstart; 1691416022c9SBarry Smith a->rend = oldmat->rend; 1692416022c9SBarry Smith a->cstart = oldmat->cstart; 1693416022c9SBarry Smith a->cend = oldmat->cend; 169417699dbbSLois Curfman McInnes a->size = oldmat->size; 169517699dbbSLois Curfman McInnes a->rank = oldmat->rank; 169647794344SBarry Smith mat->insertmode = NOT_SET_VALUES; 1697bcd2baecSBarry Smith a->rowvalues = 0; 1698bcd2baecSBarry Smith a->getrowactive = PETSC_FALSE; 1699d6dfbf8fSBarry Smith 1700603f58a4SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1701f09e8eb9SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1702603f58a4SSatish Balay a->cowners = a->rowners + a->size + 2; 1703603f58a4SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1704416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 17052ee70a88SLois Curfman McInnes if (oldmat->colmap) { 17060452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1707416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1708416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1709416022c9SBarry Smith } else a->colmap = 0; 17103f41c07dSBarry Smith if (oldmat->garray) { 17113f41c07dSBarry Smith len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 17123f41c07dSBarry Smith a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1713464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 17143f41c07dSBarry Smith if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1715416022c9SBarry Smith } else a->garray = 0; 1716d6dfbf8fSBarry Smith 1717416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1718416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1719a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1720416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1721416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1722416022c9SBarry Smith PLogObjectParent(mat,a->A); 1723416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1724416022c9SBarry Smith PLogObjectParent(mat,a->B); 17255dd7a6c7SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 172625cdf11fSBarry Smith if (flg) { 1727682d7d0cSBarry Smith ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1728682d7d0cSBarry Smith } 17298a729477SBarry Smith *newmat = mat; 17308a729477SBarry Smith return 0; 17318a729477SBarry Smith } 1732416022c9SBarry Smith 173377c4ece6SBarry Smith #include "sys.h" 1734416022c9SBarry Smith 17355615d1e5SSatish Balay #undef __FUNC__ 17365615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIAIJ" 173719bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1738416022c9SBarry Smith { 1739d65a2f8fSBarry Smith Mat A; 1740416022c9SBarry Smith int i, nz, ierr, j,rstart, rend, fd; 1741d65a2f8fSBarry Smith Scalar *vals,*svals; 174219bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 1743416022c9SBarry Smith MPI_Status status; 174417699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1745d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 174619bcc07fSBarry Smith int tag = ((PetscObject)viewer)->tag; 1747416022c9SBarry Smith 174817699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 174917699dbbSLois Curfman McInnes if (!rank) { 175090ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 175177c4ece6SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1752e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1753416022c9SBarry Smith } 1754416022c9SBarry Smith 1755416022c9SBarry Smith MPI_Bcast(header+1,3,MPI_INT,0,comm); 1756416022c9SBarry Smith M = header[1]; N = header[2]; 1757416022c9SBarry Smith /* determine ownership of all rows */ 175817699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 17590452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1760416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1761416022c9SBarry Smith rowners[0] = 0; 176217699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1763416022c9SBarry Smith rowners[i] += rowners[i-1]; 1764416022c9SBarry Smith } 176517699dbbSLois Curfman McInnes rstart = rowners[rank]; 176617699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1767416022c9SBarry Smith 1768416022c9SBarry Smith /* distribute row lengths to all processors */ 17690452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1770416022c9SBarry Smith offlens = ourlens + (rend-rstart); 177117699dbbSLois Curfman McInnes if (!rank) { 17720452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 177377c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 17740452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 177517699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1776d65a2f8fSBarry Smith MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 17770452661fSBarry Smith PetscFree(sndcounts); 1778416022c9SBarry Smith } 1779416022c9SBarry Smith else { 1780416022c9SBarry Smith MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1781416022c9SBarry Smith } 1782416022c9SBarry Smith 178317699dbbSLois Curfman McInnes if (!rank) { 1784416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 17850452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1786cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 178717699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1788416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1789416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1790416022c9SBarry Smith } 1791416022c9SBarry Smith } 17920452661fSBarry Smith PetscFree(rowlengths); 1793416022c9SBarry Smith 1794416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1795416022c9SBarry Smith maxnz = 0; 179617699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 17970452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 1798416022c9SBarry Smith } 17990452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1800416022c9SBarry Smith 1801416022c9SBarry Smith /* read in my part of the matrix column indices */ 1802416022c9SBarry Smith nz = procsnz[0]; 18030452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 180477c4ece6SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1805d65a2f8fSBarry Smith 1806d65a2f8fSBarry Smith /* read in every one elses and ship off */ 180717699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1808d65a2f8fSBarry Smith nz = procsnz[i]; 180977c4ece6SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1810d65a2f8fSBarry Smith MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1811d65a2f8fSBarry Smith } 18120452661fSBarry Smith PetscFree(cols); 1813416022c9SBarry Smith } 1814416022c9SBarry Smith else { 1815416022c9SBarry Smith /* determine buffer space needed for message */ 1816416022c9SBarry Smith nz = 0; 1817416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1818416022c9SBarry Smith nz += ourlens[i]; 1819416022c9SBarry Smith } 18200452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1821416022c9SBarry Smith 1822416022c9SBarry Smith /* receive message of column indices*/ 1823d65a2f8fSBarry Smith MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1824416022c9SBarry Smith MPI_Get_count(&status,MPI_INT,&maxnz); 1825e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1826416022c9SBarry Smith } 1827416022c9SBarry Smith 1828416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1829cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 1830416022c9SBarry Smith jj = 0; 1831416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1832416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1833d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1834416022c9SBarry Smith jj++; 1835416022c9SBarry Smith } 1836416022c9SBarry Smith } 1837d65a2f8fSBarry Smith 1838d65a2f8fSBarry Smith /* create our matrix */ 1839416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1840416022c9SBarry Smith ourlens[i] -= offlens[i]; 1841416022c9SBarry Smith } 1842d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1843d65a2f8fSBarry Smith A = *newmat; 18446d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 1845d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1846d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1847d65a2f8fSBarry Smith } 1848416022c9SBarry Smith 184917699dbbSLois Curfman McInnes if (!rank) { 18500452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1851416022c9SBarry Smith 1852416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1853416022c9SBarry Smith nz = procsnz[0]; 185477c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1855d65a2f8fSBarry Smith 1856d65a2f8fSBarry Smith /* insert into matrix */ 1857d65a2f8fSBarry Smith jj = rstart; 1858d65a2f8fSBarry Smith smycols = mycols; 1859d65a2f8fSBarry Smith svals = vals; 1860d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1861d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1862d65a2f8fSBarry Smith smycols += ourlens[i]; 1863d65a2f8fSBarry Smith svals += ourlens[i]; 1864d65a2f8fSBarry Smith jj++; 1865416022c9SBarry Smith } 1866416022c9SBarry Smith 1867d65a2f8fSBarry Smith /* read in other processors and ship out */ 186817699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1869416022c9SBarry Smith nz = procsnz[i]; 187077c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1871d65a2f8fSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1872416022c9SBarry Smith } 18730452661fSBarry Smith PetscFree(procsnz); 1874416022c9SBarry Smith } 1875d65a2f8fSBarry Smith else { 1876d65a2f8fSBarry Smith /* receive numeric values */ 18770452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1878416022c9SBarry Smith 1879d65a2f8fSBarry Smith /* receive message of values*/ 1880d65a2f8fSBarry Smith MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1881d65a2f8fSBarry Smith MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1882e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1883d65a2f8fSBarry Smith 1884d65a2f8fSBarry Smith /* insert into matrix */ 1885d65a2f8fSBarry Smith jj = rstart; 1886d65a2f8fSBarry Smith smycols = mycols; 1887d65a2f8fSBarry Smith svals = vals; 1888d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1889d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1890d65a2f8fSBarry Smith smycols += ourlens[i]; 1891d65a2f8fSBarry Smith svals += ourlens[i]; 1892d65a2f8fSBarry Smith jj++; 1893d65a2f8fSBarry Smith } 1894d65a2f8fSBarry Smith } 18950452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1896d65a2f8fSBarry Smith 18976d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 18986d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1899416022c9SBarry Smith return 0; 1900416022c9SBarry Smith } 1901