1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*6831982aSBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.304 1999/10/13 20:37:20 bsmith Exp bsmith $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 570f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h" 6f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 7d9942c19SSatish Balay #include "src/inline/spops.h" 88a729477SBarry Smith 9bc5ccf88SSatish Balay extern int MatSetUpMultiply_MPIAIJ(Mat); 10bc5ccf88SSatish Balay extern int DisAssemble_MPIAIJ(Mat); 11bc5ccf88SSatish Balay extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 12bc5ccf88SSatish Balay extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 13bc5ccf88SSatish Balay extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 14bc5ccf88SSatish Balay extern int MatPrintHelp_SeqAIJ(Mat); 15bc5ccf88SSatish Balay 160f5bd95cSBarry Smith /* 170f5bd95cSBarry Smith Local utility routine that creates a mapping from the global column 189e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 190f5bd95cSBarry Smith storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 200f5bd95cSBarry Smith a slightly higher hash table cost; without it it is not scalable (each processor 210f5bd95cSBarry Smith has an order N integer array but is fast to acess. 229e25ed09SBarry Smith */ 235615d1e5SSatish Balay #undef __FUNC__ 24d4bb536fSBarry Smith #define __FUNC__ "CreateColmap_MPIAIJ_Private" 250a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat) 269e25ed09SBarry Smith { 2744a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 28ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 29dc2900e9SSatish Balay int n = B->n,i,ierr; 30dbb450caSBarry Smith 313a40ed3dSBarry Smith PetscFunctionBegin; 32aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 330f5bd95cSBarry Smith ierr = PetscTableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr); 34b1fc9764SSatish Balay for ( i=0; i<n; i++ ){ 350f5bd95cSBarry Smith ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 36b1fc9764SSatish Balay } 37b1fc9764SSatish Balay #else 38758f045eSSatish Balay aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 39464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 40549d3d68SSatish Balay ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr); 41905e6a2fSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 42b1fc9764SSatish Balay #endif 433a40ed3dSBarry Smith PetscFunctionReturn(0); 449e25ed09SBarry Smith } 459e25ed09SBarry Smith 460520107fSSatish Balay #define CHUNKSIZE 15 4730770e4dSSatish Balay #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 480520107fSSatish Balay { \ 490520107fSSatish Balay \ 500520107fSSatish Balay rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 5130770e4dSSatish Balay rmax = aimax[row]; nrow = ailen[row]; \ 52f5e9677aSSatish Balay col1 = col - shift; \ 53f5e9677aSSatish Balay \ 54ba4e3ef2SSatish Balay low = 0; high = nrow; \ 55ba4e3ef2SSatish Balay while (high-low > 5) { \ 56ba4e3ef2SSatish Balay t = (low+high)/2; \ 57ba4e3ef2SSatish Balay if (rp[t] > col) high = t; \ 58ba4e3ef2SSatish Balay else low = t; \ 59ba4e3ef2SSatish Balay } \ 600520107fSSatish Balay for ( _i=0; _i<nrow; _i++ ) { \ 61f5e9677aSSatish Balay if (rp[_i] > col1) break; \ 62f5e9677aSSatish Balay if (rp[_i] == col1) { \ 630520107fSSatish Balay if (addv == ADD_VALUES) ap[_i] += value; \ 640520107fSSatish Balay else ap[_i] = value; \ 6530770e4dSSatish Balay goto a_noinsert; \ 660520107fSSatish Balay } \ 670520107fSSatish Balay } \ 6889280ab3SLois Curfman McInnes if (nonew == 1) goto a_noinsert; \ 69a8c6a408SBarry Smith else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 700520107fSSatish Balay if (nrow >= rmax) { \ 710520107fSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 720520107fSSatish Balay int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 730520107fSSatish Balay Scalar *new_a; \ 740520107fSSatish Balay \ 75a8c6a408SBarry Smith if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 7689280ab3SLois Curfman McInnes \ 770520107fSSatish Balay /* malloc new storage space */ \ 780520107fSSatish Balay len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 790520107fSSatish Balay new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 800520107fSSatish Balay new_j = (int *) (new_a + new_nz); \ 810520107fSSatish Balay new_i = new_j + new_nz; \ 820520107fSSatish Balay \ 830520107fSSatish Balay /* copy over old data into new slots */ \ 840520107fSSatish Balay for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 850520107fSSatish Balay for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 86549d3d68SSatish Balay ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 870520107fSSatish Balay len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 88549d3d68SSatish Balay ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 89549d3d68SSatish Balay len*sizeof(int));CHKERRQ(ierr); \ 90549d3d68SSatish Balay ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 91549d3d68SSatish Balay ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 92549d3d68SSatish Balay len*sizeof(Scalar));CHKERRQ(ierr); \ 930520107fSSatish Balay /* free up old matrix storage */ \ 94f5e9677aSSatish Balay \ 95606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); \ 96606d414cSSatish Balay if (!a->singlemalloc) { \ 97606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); \ 98606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr); \ 99606d414cSSatish Balay } \ 1000520107fSSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 1010520107fSSatish Balay a->singlemalloc = 1; \ 1020520107fSSatish Balay \ 1030520107fSSatish Balay rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 10430770e4dSSatish Balay rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 1050520107fSSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 1060520107fSSatish Balay a->maxnz += CHUNKSIZE; \ 1070520107fSSatish Balay a->reallocs++; \ 1080520107fSSatish Balay } \ 1090520107fSSatish Balay N = nrow++ - 1; a->nz++; \ 1100520107fSSatish Balay /* shift up all the later entries in this row */ \ 1110520107fSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 1120520107fSSatish Balay rp[ii+1] = rp[ii]; \ 1130520107fSSatish Balay ap[ii+1] = ap[ii]; \ 1140520107fSSatish Balay } \ 115f5e9677aSSatish Balay rp[_i] = col1; \ 1160520107fSSatish Balay ap[_i] = value; \ 11730770e4dSSatish Balay a_noinsert: ; \ 1180520107fSSatish Balay ailen[row] = nrow; \ 1190520107fSSatish Balay } 1200a198c4cSBarry Smith 12130770e4dSSatish Balay #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 12230770e4dSSatish Balay { \ 12330770e4dSSatish Balay \ 12430770e4dSSatish Balay rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 12530770e4dSSatish Balay rmax = bimax[row]; nrow = bilen[row]; \ 12630770e4dSSatish Balay col1 = col - shift; \ 12730770e4dSSatish Balay \ 128ba4e3ef2SSatish Balay low = 0; high = nrow; \ 129ba4e3ef2SSatish Balay while (high-low > 5) { \ 130ba4e3ef2SSatish Balay t = (low+high)/2; \ 131ba4e3ef2SSatish Balay if (rp[t] > col) high = t; \ 132ba4e3ef2SSatish Balay else low = t; \ 133ba4e3ef2SSatish Balay } \ 13430770e4dSSatish Balay for ( _i=0; _i<nrow; _i++ ) { \ 13530770e4dSSatish Balay if (rp[_i] > col1) break; \ 13630770e4dSSatish Balay if (rp[_i] == col1) { \ 13730770e4dSSatish Balay if (addv == ADD_VALUES) ap[_i] += value; \ 13830770e4dSSatish Balay else ap[_i] = value; \ 13930770e4dSSatish Balay goto b_noinsert; \ 14030770e4dSSatish Balay } \ 14130770e4dSSatish Balay } \ 14289280ab3SLois Curfman McInnes if (nonew == 1) goto b_noinsert; \ 143a8c6a408SBarry Smith else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 14430770e4dSSatish Balay if (nrow >= rmax) { \ 14530770e4dSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 14674c639caSSatish Balay int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 14730770e4dSSatish Balay Scalar *new_a; \ 14830770e4dSSatish Balay \ 149a8c6a408SBarry Smith if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 15089280ab3SLois Curfman McInnes \ 15130770e4dSSatish Balay /* malloc new storage space */ \ 15274c639caSSatish Balay len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 15330770e4dSSatish Balay new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 15430770e4dSSatish Balay new_j = (int *) (new_a + new_nz); \ 15530770e4dSSatish Balay new_i = new_j + new_nz; \ 15630770e4dSSatish Balay \ 15730770e4dSSatish Balay /* copy over old data into new slots */ \ 15830770e4dSSatish Balay for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 15974c639caSSatish Balay for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 160549d3d68SSatish Balay ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 16130770e4dSSatish Balay len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 162549d3d68SSatish Balay ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 163549d3d68SSatish Balay len*sizeof(int));CHKERRQ(ierr); \ 164549d3d68SSatish Balay ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 165549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 166549d3d68SSatish Balay len*sizeof(Scalar));CHKERRQ(ierr); \ 16730770e4dSSatish Balay /* free up old matrix storage */ \ 16830770e4dSSatish Balay \ 169606d414cSSatish Balay ierr = PetscFree(b->a);CHKERRQ(ierr); \ 170606d414cSSatish Balay if (!b->singlemalloc) { \ 171606d414cSSatish Balay ierr = PetscFree(b->i);CHKERRQ(ierr); \ 172606d414cSSatish Balay ierr = PetscFree(b->j);CHKERRQ(ierr); \ 173606d414cSSatish Balay } \ 17474c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 17574c639caSSatish Balay b->singlemalloc = 1; \ 17630770e4dSSatish Balay \ 17730770e4dSSatish Balay rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 17830770e4dSSatish Balay rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 17974c639caSSatish Balay PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 18074c639caSSatish Balay b->maxnz += CHUNKSIZE; \ 18174c639caSSatish Balay b->reallocs++; \ 18230770e4dSSatish Balay } \ 18374c639caSSatish Balay N = nrow++ - 1; b->nz++; \ 18430770e4dSSatish Balay /* shift up all the later entries in this row */ \ 18530770e4dSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 18630770e4dSSatish Balay rp[ii+1] = rp[ii]; \ 18730770e4dSSatish Balay ap[ii+1] = ap[ii]; \ 18830770e4dSSatish Balay } \ 18930770e4dSSatish Balay rp[_i] = col1; \ 19030770e4dSSatish Balay ap[_i] = value; \ 19130770e4dSSatish Balay b_noinsert: ; \ 19230770e4dSSatish Balay bilen[row] = nrow; \ 19330770e4dSSatish Balay } 19430770e4dSSatish Balay 1955615d1e5SSatish Balay #undef __FUNC__ 1965615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIAIJ" 1978f6be9afSLois Curfman McInnes int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 1988a729477SBarry Smith { 19944a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 2004b0e389bSBarry Smith Scalar value; 2011eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 2021eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 203905e6a2fSBarry Smith int roworiented = aij->roworiented; 2048a729477SBarry Smith 2050520107fSSatish Balay /* Some Variables required in the macro */ 2064ee7247eSSatish Balay Mat A = aij->A; 2074ee7247eSSatish Balay Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 20830770e4dSSatish Balay int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 20930770e4dSSatish Balay Scalar *aa = a->a; 21030770e4dSSatish Balay 21130770e4dSSatish Balay Mat B = aij->B; 21230770e4dSSatish Balay Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 21330770e4dSSatish Balay int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 21430770e4dSSatish Balay Scalar *ba = b->a; 21530770e4dSSatish Balay 216ba4e3ef2SSatish Balay int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 21730770e4dSSatish Balay int nonew = a->nonew,shift = a->indexshift; 21830770e4dSSatish Balay Scalar *ap; 2194ee7247eSSatish Balay 2203a40ed3dSBarry Smith PetscFunctionBegin; 2218a729477SBarry Smith for ( i=0; i<m; i++ ) { 2225ef9f2a5SBarry Smith if (im[i] < 0) continue; 223aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 224a8c6a408SBarry Smith if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 2250a198c4cSBarry Smith #endif 2264b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 2274b0e389bSBarry Smith row = im[i] - rstart; 2281eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 2294b0e389bSBarry Smith if (in[j] >= cstart && in[j] < cend){ 2304b0e389bSBarry Smith col = in[j] - cstart; 2314b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 23230770e4dSSatish Balay MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 2330520107fSSatish Balay /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 2341eb62cbbSBarry Smith } 2355ef9f2a5SBarry Smith else if (in[j] < 0) continue; 236aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 237a8c6a408SBarry Smith else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 2380a198c4cSBarry Smith #endif 2391eb62cbbSBarry Smith else { 240227d817aSBarry Smith if (mat->was_assembled) { 241905e6a2fSBarry Smith if (!aij->colmap) { 242905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 243905e6a2fSBarry Smith } 244aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 2450f5bd95cSBarry Smith ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 246fa46199cSSatish Balay col--; 247b1fc9764SSatish Balay #else 248905e6a2fSBarry Smith col = aij->colmap[in[j]] - 1; 249b1fc9764SSatish Balay #endif 250ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 2512493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 2524b0e389bSBarry Smith col = in[j]; 2539bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 254f9508a3cSSatish Balay B = aij->B; 255f9508a3cSSatish Balay b = (Mat_SeqAIJ *) B->data; 256f9508a3cSSatish Balay bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 257f9508a3cSSatish Balay ba = b->a; 258d6dfbf8fSBarry Smith } 259c48de900SBarry Smith } else col = in[j]; 2604b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 26130770e4dSSatish Balay MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 26230770e4dSSatish Balay /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 2631eb62cbbSBarry Smith } 2641eb62cbbSBarry Smith } 2655ef9f2a5SBarry Smith } else { 26690f02eecSBarry Smith if (!aij->donotstash) { 267d36fbae8SSatish Balay if (roworiented) { 2688798bf22SSatish Balay ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 269d36fbae8SSatish Balay } else { 2708798bf22SSatish Balay ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 2714b0e389bSBarry Smith } 2721eb62cbbSBarry Smith } 2738a729477SBarry Smith } 27490f02eecSBarry Smith } 2753a40ed3dSBarry Smith PetscFunctionReturn(0); 2768a729477SBarry Smith } 2778a729477SBarry Smith 2785615d1e5SSatish Balay #undef __FUNC__ 2795615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIAIJ" 2808f6be9afSLois Curfman McInnes int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 281b49de8d1SLois Curfman McInnes { 282b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 283b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 284b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 285b49de8d1SLois Curfman McInnes 2863a40ed3dSBarry Smith PetscFunctionBegin; 287b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 288a8c6a408SBarry Smith if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 289a8c6a408SBarry Smith if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 290b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 291b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 292b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 293a8c6a408SBarry Smith if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 294a8c6a408SBarry Smith if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 295b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 296b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 297b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298fa852ad4SSatish Balay } else { 299905e6a2fSBarry Smith if (!aij->colmap) { 300905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301905e6a2fSBarry Smith } 302aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 3030f5bd95cSBarry Smith ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304fa46199cSSatish Balay col --; 305b1fc9764SSatish Balay #else 306905e6a2fSBarry Smith col = aij->colmap[idxn[j]] - 1; 307b1fc9764SSatish Balay #endif 308e60e1c95SSatish Balay if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309d9d09a02SSatish Balay else { 310b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311b49de8d1SLois Curfman McInnes } 312b49de8d1SLois Curfman McInnes } 313b49de8d1SLois Curfman McInnes } 314a8c6a408SBarry Smith } else { 315a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 316b49de8d1SLois Curfman McInnes } 317b49de8d1SLois Curfman McInnes } 3183a40ed3dSBarry Smith PetscFunctionReturn(0); 319b49de8d1SLois Curfman McInnes } 320bc5ccf88SSatish Balay 321bc5ccf88SSatish Balay #undef __FUNC__ 322bc5ccf88SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 323bc5ccf88SSatish Balay int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324bc5ccf88SSatish Balay { 325bc5ccf88SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 326d36fbae8SSatish Balay int ierr,nstash,reallocs; 327bc5ccf88SSatish Balay InsertMode addv; 328bc5ccf88SSatish Balay 329bc5ccf88SSatish Balay PetscFunctionBegin; 330bc5ccf88SSatish Balay if (aij->donotstash) { 331bc5ccf88SSatish Balay PetscFunctionReturn(0); 332bc5ccf88SSatish Balay } 333bc5ccf88SSatish Balay 334bc5ccf88SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 335bc5ccf88SSatish Balay ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336bc5ccf88SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 337bc5ccf88SSatish Balay SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 338bc5ccf88SSatish Balay } 339bc5ccf88SSatish Balay mat->insertmode = addv; /* in case this processor had no cache */ 340bc5ccf88SSatish Balay 3418798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 3428798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 3435a655dc6SBarry Smith PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344bc5ccf88SSatish Balay PetscFunctionReturn(0); 345bc5ccf88SSatish Balay } 346bc5ccf88SSatish Balay 347bc5ccf88SSatish Balay 348bc5ccf88SSatish Balay #undef __FUNC__ 349bc5ccf88SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 350bc5ccf88SSatish Balay int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351bc5ccf88SSatish Balay { 352bc5ccf88SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 353a2d1c673SSatish Balay int i,j,rstart,ncols,n,ierr,flg; 354bc5ccf88SSatish Balay int *row,*col,other_disassembled; 355bc5ccf88SSatish Balay Scalar *val; 356bc5ccf88SSatish Balay InsertMode addv = mat->insertmode; 357bc5ccf88SSatish Balay 358bc5ccf88SSatish Balay PetscFunctionBegin; 359bc5ccf88SSatish Balay if (!aij->donotstash) { 360a2d1c673SSatish Balay while (1) { 3618798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 362a2d1c673SSatish Balay if (!flg) break; 363a2d1c673SSatish Balay 364bc5ccf88SSatish Balay for ( i=0; i<n; ) { 365bc5ccf88SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 366bc5ccf88SSatish Balay for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 367bc5ccf88SSatish Balay if (j < n) ncols = j-i; 368bc5ccf88SSatish Balay else ncols = n-i; 369bc5ccf88SSatish Balay /* Now assemble all these values with a single function call */ 370bc5ccf88SSatish Balay ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 371bc5ccf88SSatish Balay i = j; 372bc5ccf88SSatish Balay } 373bc5ccf88SSatish Balay } 3748798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 375bc5ccf88SSatish Balay } 376bc5ccf88SSatish Balay 377bc5ccf88SSatish Balay ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 378bc5ccf88SSatish Balay ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 379bc5ccf88SSatish Balay 380bc5ccf88SSatish Balay /* determine if any processor has disassembled, if so we must 381bc5ccf88SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 382bc5ccf88SSatish Balay /* 383bc5ccf88SSatish Balay if nonzero structure of submatrix B cannot change then we know that 384bc5ccf88SSatish Balay no processor disassembled thus we can skip this stuff 385bc5ccf88SSatish Balay */ 386bc5ccf88SSatish Balay if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 387bc5ccf88SSatish Balay ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 388bc5ccf88SSatish Balay if (mat->was_assembled && !other_disassembled) { 389bc5ccf88SSatish Balay ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 390bc5ccf88SSatish Balay } 391bc5ccf88SSatish Balay } 392bc5ccf88SSatish Balay 393bc5ccf88SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 394bc5ccf88SSatish Balay ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 395bc5ccf88SSatish Balay } 396bc5ccf88SSatish Balay ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 397bc5ccf88SSatish Balay ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 398bc5ccf88SSatish Balay 399606d414cSSatish Balay if (aij->rowvalues) { 400606d414cSSatish Balay ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 401606d414cSSatish Balay aij->rowvalues = 0; 402606d414cSSatish Balay } 403bc5ccf88SSatish Balay PetscFunctionReturn(0); 404bc5ccf88SSatish Balay } 405bc5ccf88SSatish Balay 4065615d1e5SSatish Balay #undef __FUNC__ 4075615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIAIJ" 4088f6be9afSLois Curfman McInnes int MatZeroEntries_MPIAIJ(Mat A) 4091eb62cbbSBarry Smith { 41044a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 411dbd7a890SLois Curfman McInnes int ierr; 4123a40ed3dSBarry Smith 4133a40ed3dSBarry Smith PetscFunctionBegin; 41478b31e54SBarry Smith ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 41578b31e54SBarry Smith ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 4163a40ed3dSBarry Smith PetscFunctionReturn(0); 4171eb62cbbSBarry Smith } 4181eb62cbbSBarry Smith 4195615d1e5SSatish Balay #undef __FUNC__ 4205615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIAIJ" 4218f6be9afSLois Curfman McInnes int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 4221eb62cbbSBarry Smith { 42344a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 42417699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 4256a5c57faSSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work,row; 42617699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 4275392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 4286a5c57faSSatish Balay int *lens,imdex,*lrows,*values,rstart=l->rstart; 429d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 4301eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 4311eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 4321eb62cbbSBarry Smith IS istmp; 4331eb62cbbSBarry Smith 4343a40ed3dSBarry Smith PetscFunctionBegin; 43577c4ece6SBarry Smith ierr = ISGetSize(is,&N);CHKERRQ(ierr); 43678b31e54SBarry Smith ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 4371eb62cbbSBarry Smith 4381eb62cbbSBarry Smith /* first count number of contributors to each processor */ 4390452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 440549d3d68SSatish Balay ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 441549d3d68SSatish Balay procs = nprocs + size; 4420452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 4431eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 4441eb62cbbSBarry Smith idx = rows[i]; 4451eb62cbbSBarry Smith found = 0; 44617699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 4471eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 4481eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 4491eb62cbbSBarry Smith } 4501eb62cbbSBarry Smith } 451a8c6a408SBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 4521eb62cbbSBarry Smith } 45317699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 4541eb62cbbSBarry Smith 4551eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 456*6831982aSBarry Smith work = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work); 457*6831982aSBarry Smith ierr = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 458*6831982aSBarry Smith nrecvs = work[size+rank]; 45917699dbbSLois Curfman McInnes nmax = work[rank]; 460606d414cSSatish Balay ierr = PetscFree(work);CHKERRQ(ierr); 4611eb62cbbSBarry Smith 4621eb62cbbSBarry Smith /* post receives: */ 4633a40ed3dSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 464ca161407SBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 4651eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 466ca161407SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 4671eb62cbbSBarry Smith } 4681eb62cbbSBarry Smith 4691eb62cbbSBarry Smith /* do sends: 4701eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 4711eb62cbbSBarry Smith the ith processor 4721eb62cbbSBarry Smith */ 4730452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 4743a40ed3dSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 4750452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 4761eb62cbbSBarry Smith starts[0] = 0; 47717699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 4781eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 4791eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 4801eb62cbbSBarry Smith } 481*6831982aSBarry Smith ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 4821eb62cbbSBarry Smith 4831eb62cbbSBarry Smith starts[0] = 0; 48417699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 4851eb62cbbSBarry Smith count = 0; 48617699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 4871eb62cbbSBarry Smith if (procs[i]) { 488ca161407SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 4891eb62cbbSBarry Smith } 4901eb62cbbSBarry Smith } 491606d414cSSatish Balay ierr = PetscFree(starts);CHKERRQ(ierr); 4921eb62cbbSBarry Smith 49317699dbbSLois Curfman McInnes base = owners[rank]; 4941eb62cbbSBarry Smith 4951eb62cbbSBarry Smith /* wait on receives */ 4960452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 4971eb62cbbSBarry Smith source = lens + nrecvs; 4981eb62cbbSBarry Smith count = nrecvs; slen = 0; 4991eb62cbbSBarry Smith while (count) { 500ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 5011eb62cbbSBarry Smith /* unpack receives into our local space */ 502ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 503d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 504d6dfbf8fSBarry Smith lens[imdex] = n; 5051eb62cbbSBarry Smith slen += n; 5061eb62cbbSBarry Smith count--; 5071eb62cbbSBarry Smith } 508606d414cSSatish Balay ierr = PetscFree(recv_waits);CHKERRQ(ierr); 5091eb62cbbSBarry Smith 5101eb62cbbSBarry Smith /* move the data into the send scatter */ 5110452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 5121eb62cbbSBarry Smith count = 0; 5131eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 5141eb62cbbSBarry Smith values = rvalues + i*nmax; 5151eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 5161eb62cbbSBarry Smith lrows[count++] = values[j] - base; 5171eb62cbbSBarry Smith } 5181eb62cbbSBarry Smith } 519606d414cSSatish Balay ierr = PetscFree(rvalues);CHKERRQ(ierr); 520606d414cSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 521606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 522606d414cSSatish Balay ierr = PetscFree(nprocs);CHKERRQ(ierr); 5231eb62cbbSBarry Smith 5241eb62cbbSBarry Smith /* actually zap the local rows */ 525029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 526464493b3SBarry Smith PLogObjectParent(A,istmp); 5276a5c57faSSatish Balay 5286eb55b6aSBarry Smith /* 5296eb55b6aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 5306eb55b6aSBarry Smith is square and the user wishes to set the diagonal we use seperate 5316eb55b6aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 5326eb55b6aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 5336eb55b6aSBarry Smith 5346eb55b6aSBarry Smith Contributed by: Mathew Knepley 5356eb55b6aSBarry Smith */ 536e2d53e46SBarry Smith /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 537e2d53e46SBarry Smith ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 5386eb55b6aSBarry Smith if (diag && (l->A->M == l->A->N)) { 5396eb55b6aSBarry Smith ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 540e2d53e46SBarry Smith } else if (diag) { 541e2d53e46SBarry Smith ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 542fa46199cSSatish Balay if (((Mat_SeqAIJ*)l->A->data)->nonew) { 543fa46199cSSatish Balay SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 544fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 5456525c446SSatish Balay } 546e2d53e46SBarry Smith for ( i = 0; i < slen; i++ ) { 547e2d53e46SBarry Smith row = lrows[i] + rstart; 548e2d53e46SBarry Smith ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 549e2d53e46SBarry Smith } 550e2d53e46SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 551e2d53e46SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5526eb55b6aSBarry Smith } else { 5536a5c57faSSatish Balay ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 5546eb55b6aSBarry Smith } 55578b31e54SBarry Smith ierr = ISDestroy(istmp);CHKERRQ(ierr); 556606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 55772dacd9aSBarry Smith 5581eb62cbbSBarry Smith /* wait on sends */ 5591eb62cbbSBarry Smith if (nsends) { 560ca161407SBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 561ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 562606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 5631eb62cbbSBarry Smith } 564606d414cSSatish Balay ierr = PetscFree(send_waits);CHKERRQ(ierr); 565606d414cSSatish Balay ierr = PetscFree(svalues);CHKERRQ(ierr); 5661eb62cbbSBarry Smith 5673a40ed3dSBarry Smith PetscFunctionReturn(0); 5681eb62cbbSBarry Smith } 5691eb62cbbSBarry Smith 5705615d1e5SSatish Balay #undef __FUNC__ 5715615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIAIJ" 5728f6be9afSLois Curfman McInnes int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 5731eb62cbbSBarry Smith { 574416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 575fbd6ef76SBarry Smith int ierr,nt; 576416022c9SBarry Smith 5773a40ed3dSBarry Smith PetscFunctionBegin; 578a2ce50c7SBarry Smith ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 579fbd6ef76SBarry Smith if (nt != a->n) { 580a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 581fbd6ef76SBarry Smith } 58243a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 583f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 58443a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 585f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 5863a40ed3dSBarry Smith PetscFunctionReturn(0); 5871eb62cbbSBarry Smith } 5881eb62cbbSBarry Smith 5895615d1e5SSatish Balay #undef __FUNC__ 5905615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIAIJ" 5918f6be9afSLois Curfman McInnes int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 592da3a660dSBarry Smith { 593416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 594da3a660dSBarry Smith int ierr; 5953a40ed3dSBarry Smith 5963a40ed3dSBarry Smith PetscFunctionBegin; 59743a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 598f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 59943a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 600f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 6013a40ed3dSBarry Smith PetscFunctionReturn(0); 602da3a660dSBarry Smith } 603da3a660dSBarry Smith 6045615d1e5SSatish Balay #undef __FUNC__ 6055615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIAIJ" 6068f6be9afSLois Curfman McInnes int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 607da3a660dSBarry Smith { 608416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 609da3a660dSBarry Smith int ierr; 610da3a660dSBarry Smith 6113a40ed3dSBarry Smith PetscFunctionBegin; 612da3a660dSBarry Smith /* do nondiagonal part */ 613f830108cSBarry Smith ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 614da3a660dSBarry Smith /* send it on its way */ 615537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 616da3a660dSBarry Smith /* do local part */ 617f830108cSBarry Smith ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr); 618da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 619da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 620da3a660dSBarry Smith /* but is not perhaps always true. */ 621537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 6223a40ed3dSBarry Smith PetscFunctionReturn(0); 623da3a660dSBarry Smith } 624da3a660dSBarry Smith 6255615d1e5SSatish Balay #undef __FUNC__ 6265615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIAIJ" 6278f6be9afSLois Curfman McInnes int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 628da3a660dSBarry Smith { 629416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 630da3a660dSBarry Smith int ierr; 631da3a660dSBarry Smith 6323a40ed3dSBarry Smith PetscFunctionBegin; 633da3a660dSBarry Smith /* do nondiagonal part */ 634f830108cSBarry Smith ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 635da3a660dSBarry Smith /* send it on its way */ 636537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 637da3a660dSBarry Smith /* do local part */ 638f830108cSBarry Smith ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 639da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 640da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 641da3a660dSBarry Smith /* but is not perhaps always true. */ 6420a198c4cSBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 6433a40ed3dSBarry Smith PetscFunctionReturn(0); 644da3a660dSBarry Smith } 645da3a660dSBarry Smith 6461eb62cbbSBarry Smith /* 6471eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 6481eb62cbbSBarry Smith diagonal block 6491eb62cbbSBarry Smith */ 6505615d1e5SSatish Balay #undef __FUNC__ 6515615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIAIJ" 6528f6be9afSLois Curfman McInnes int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 6531eb62cbbSBarry Smith { 6543a40ed3dSBarry Smith int ierr; 655416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 6563a40ed3dSBarry Smith 6573a40ed3dSBarry Smith PetscFunctionBegin; 658a8c6a408SBarry Smith if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 6595baf8537SBarry Smith if (a->rstart != a->cstart || a->rend != a->cend) { 660a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 6613a40ed3dSBarry Smith } 6623a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 6633a40ed3dSBarry Smith PetscFunctionReturn(0); 6641eb62cbbSBarry Smith } 6651eb62cbbSBarry Smith 6665615d1e5SSatish Balay #undef __FUNC__ 6675615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIAIJ" 6688f6be9afSLois Curfman McInnes int MatScale_MPIAIJ(Scalar *aa,Mat A) 669052efed2SBarry Smith { 670052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 671052efed2SBarry Smith int ierr; 6723a40ed3dSBarry Smith 6733a40ed3dSBarry Smith PetscFunctionBegin; 674052efed2SBarry Smith ierr = MatScale(aa,a->A);CHKERRQ(ierr); 675052efed2SBarry Smith ierr = MatScale(aa,a->B);CHKERRQ(ierr); 6763a40ed3dSBarry Smith PetscFunctionReturn(0); 677052efed2SBarry Smith } 678052efed2SBarry Smith 6795615d1e5SSatish Balay #undef __FUNC__ 680d4bb536fSBarry Smith #define __FUNC__ "MatDestroy_MPIAIJ" 681e1311b90SBarry Smith int MatDestroy_MPIAIJ(Mat mat) 6821eb62cbbSBarry Smith { 68344a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 6841eb62cbbSBarry Smith int ierr; 68583e2fdc7SBarry Smith 6863a40ed3dSBarry Smith PetscFunctionBegin; 68770429bc8SBarry Smith 68870429bc8SBarry Smith if (mat->mapping) { 68970429bc8SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 69070429bc8SBarry Smith } 69170429bc8SBarry Smith if (mat->bmapping) { 69270429bc8SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 69370429bc8SBarry Smith } 69461b13de0SBarry Smith if (mat->rmap) { 69561b13de0SBarry Smith ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 69661b13de0SBarry Smith } 69761b13de0SBarry Smith if (mat->cmap) { 69861b13de0SBarry Smith ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 69961b13de0SBarry Smith } 700aa482453SBarry Smith #if defined(PETSC_USE_LOG) 701e1311b90SBarry Smith PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 702a5a9c739SBarry Smith #endif 7038798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 704606d414cSSatish Balay ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 70578b31e54SBarry Smith ierr = MatDestroy(aij->A);CHKERRQ(ierr); 70678b31e54SBarry Smith ierr = MatDestroy(aij->B);CHKERRQ(ierr); 707aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 7080f5bd95cSBarry Smith if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 709b1fc9764SSatish Balay #else 710606d414cSSatish Balay if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 711b1fc9764SSatish Balay #endif 712606d414cSSatish Balay if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 7131eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 714a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 715606d414cSSatish Balay if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 716606d414cSSatish Balay ierr = PetscFree(aij);CHKERRQ(ierr); 717a5a9c739SBarry Smith PLogObjectDestroy(mat); 7180452661fSBarry Smith PetscHeaderDestroy(mat); 7193a40ed3dSBarry Smith PetscFunctionReturn(0); 7201eb62cbbSBarry Smith } 721ee50ffe9SBarry Smith 7225615d1e5SSatish Balay #undef __FUNC__ 723d4bb536fSBarry Smith #define __FUNC__ "MatView_MPIAIJ_Binary" 724bc5ccf88SSatish Balay int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 7251eb62cbbSBarry Smith { 726416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 727416022c9SBarry Smith int ierr; 728416022c9SBarry Smith 7293a40ed3dSBarry Smith PetscFunctionBegin; 73017699dbbSLois Curfman McInnes if (aij->size == 1) { 731416022c9SBarry Smith ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 732416022c9SBarry Smith } 733a8c6a408SBarry Smith else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 7343a40ed3dSBarry Smith PetscFunctionReturn(0); 735416022c9SBarry Smith } 736416022c9SBarry Smith 7375615d1e5SSatish Balay #undef __FUNC__ 7387b2a1423SBarry Smith #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket" 739bc5ccf88SSatish Balay int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 740416022c9SBarry Smith { 74144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 742dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 743*6831982aSBarry Smith int ierr, format,shift = C->indexshift,rank = aij->rank,size = aij->size; 744*6831982aSBarry Smith PetscTruth isdraw,isascii; 745416022c9SBarry Smith 7463a40ed3dSBarry Smith PetscFunctionBegin; 747*6831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 748*6831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 7490f5bd95cSBarry Smith if (isascii) { 750d8467735SBarry Smith ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 7510a198c4cSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 7524e220ebcSLois Curfman McInnes MatInfo info; 7534e220ebcSLois Curfman McInnes int flg; 7541dab6e02SBarry Smith ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 755888f2ed8SSatish Balay ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 75695e01e2fSLois Curfman McInnes ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 757*6831982aSBarry Smith if (flg) { 758*6831982aSBarry Smith ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 759*6831982aSBarry Smith rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 760*6831982aSBarry Smith } else { 761*6831982aSBarry Smith ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 762*6831982aSBarry Smith rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 763*6831982aSBarry Smith } 764888f2ed8SSatish Balay ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 765*6831982aSBarry Smith ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 766888f2ed8SSatish Balay ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 767*6831982aSBarry Smith ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 768*6831982aSBarry Smith ierr = ViewerFlush(viewer);CHKERRQ(ierr); 769a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 7703a40ed3dSBarry Smith PetscFunctionReturn(0); 7713a40ed3dSBarry Smith } else if (format == VIEWER_FORMAT_ASCII_INFO) { 7723a40ed3dSBarry Smith PetscFunctionReturn(0); 77308480c60SBarry Smith } 7740f5bd95cSBarry Smith } else if (isdraw) { 77519bcc07fSBarry Smith Draw draw; 77619bcc07fSBarry Smith PetscTruth isnull; 77777ed5343SBarry Smith ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 7783a40ed3dSBarry Smith ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 77919bcc07fSBarry Smith } 78019bcc07fSBarry Smith 78117699dbbSLois Curfman McInnes if (size == 1) { 78278b31e54SBarry Smith ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 7833a40ed3dSBarry Smith } else { 78495373324SBarry Smith /* assemble the entire matrix onto first processor. */ 78595373324SBarry Smith Mat A; 786ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 7872eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 78895373324SBarry Smith Scalar *a; 7892ee70a88SLois Curfman McInnes 79017699dbbSLois Curfman McInnes if (!rank) { 79155843e3eSBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 7923a40ed3dSBarry Smith } else { 79355843e3eSBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 79495373324SBarry Smith } 795464493b3SBarry Smith PLogObjectParent(mat,A); 796416022c9SBarry Smith 79795373324SBarry Smith /* copy over the A part */ 798ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 7992ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 80095373324SBarry Smith row = aij->rstart; 801dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 80295373324SBarry Smith for ( i=0; i<m; i++ ) { 803416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 80495373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 80595373324SBarry Smith } 8062ee70a88SLois Curfman McInnes aj = Aloc->j; 807dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 80895373324SBarry Smith 80995373324SBarry Smith /* copy over the B part */ 810ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 8112ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 81295373324SBarry Smith row = aij->rstart; 8130452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols); 814dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 81595373324SBarry Smith for ( i=0; i<m; i++ ) { 816416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 81795373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 81895373324SBarry Smith } 819606d414cSSatish Balay ierr = PetscFree(ct);CHKERRQ(ierr); 8206d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8216d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 82255843e3eSBarry Smith /* 82355843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 82455843e3eSBarry Smith synchronized across all processors that share the Draw object 82555843e3eSBarry Smith */ 826*6831982aSBarry Smith if (!rank) { 827*6831982aSBarry Smith Viewer sviewer; 828*6831982aSBarry Smith ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 829*6831982aSBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 830*6831982aSBarry Smith ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 83195373324SBarry Smith } 83278b31e54SBarry Smith ierr = MatDestroy(A);CHKERRQ(ierr); 83395373324SBarry Smith } 8343a40ed3dSBarry Smith PetscFunctionReturn(0); 8351eb62cbbSBarry Smith } 8361eb62cbbSBarry Smith 8375615d1e5SSatish Balay #undef __FUNC__ 838d4bb536fSBarry Smith #define __FUNC__ "MatView_MPIAIJ" 839e1311b90SBarry Smith int MatView_MPIAIJ(Mat mat,Viewer viewer) 840416022c9SBarry Smith { 841416022c9SBarry Smith int ierr; 842*6831982aSBarry Smith PetscTruth isascii,isdraw,issocket,isbinary; 843416022c9SBarry Smith 8443a40ed3dSBarry Smith PetscFunctionBegin; 845*6831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 846*6831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 847*6831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 848*6831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 8490f5bd95cSBarry Smith if (isascii || isdraw || isbinary || issocket) { 8507b2a1423SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 8515cd90555SBarry Smith } else { 8520f5bd95cSBarry Smith SETERRQ1(1,1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 853416022c9SBarry Smith } 8543a40ed3dSBarry Smith PetscFunctionReturn(0); 855416022c9SBarry Smith } 856416022c9SBarry Smith 8571eb62cbbSBarry Smith /* 8581eb62cbbSBarry Smith This has to provide several versions. 8591eb62cbbSBarry Smith 8601eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 8611eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 862d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 8631eb62cbbSBarry Smith */ 8645615d1e5SSatish Balay #undef __FUNC__ 8655615d1e5SSatish Balay #define __FUNC__ "MatRelax_MPIAIJ" 8668f6be9afSLois Curfman McInnes int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 867dbb450caSBarry Smith double fshift,int its,Vec xx) 8688a729477SBarry Smith { 86944a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 870d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 871ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 872c16cb8f2SBarry Smith Scalar *b,*x,*xs,*ls,d,*v,sum; 8736abc6512SBarry Smith int ierr,*idx, *diag; 874416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 8758a729477SBarry Smith 8763a40ed3dSBarry Smith PetscFunctionBegin; 87783e2fdc7SBarry Smith if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA);CHKERRQ(ierr);} 878d6dfbf8fSBarry Smith diag = A->diag; 879c16cb8f2SBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 880da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 881f830108cSBarry Smith ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 8823a40ed3dSBarry Smith PetscFunctionReturn(0); 883da3a660dSBarry Smith } 8843a40ed3dSBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 8853a40ed3dSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 886184914b5SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 887184914b5SBarry Smith if (xx != bb) { 888184914b5SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 889184914b5SBarry Smith } else { 890184914b5SBarry Smith b = x; 891184914b5SBarry Smith } 892184914b5SBarry Smith ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 893184914b5SBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 894184914b5SBarry Smith ls = ls + shift; 895d6dfbf8fSBarry Smith while (its--) { 896d6dfbf8fSBarry Smith /* go down through the rows */ 897d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 898d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 8994d197716SBarry Smith PLogFlops(4*n+3); 900dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 901dbb450caSBarry Smith v = A->a + A->i[i] + shift; 902d6dfbf8fSBarry Smith sum = b[i]; 903d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 904dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 905d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 906dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 907dbb450caSBarry Smith v = B->a + B->i[i] + shift; 908d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 90955a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 910d6dfbf8fSBarry Smith } 911d6dfbf8fSBarry Smith /* come up through the rows */ 912d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 913d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 9144d197716SBarry Smith PLogFlops(4*n+3) 915dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 916dbb450caSBarry Smith v = A->a + A->i[i] + shift; 917d6dfbf8fSBarry Smith sum = b[i]; 918d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 919dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 920d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 921dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 922dbb450caSBarry Smith v = B->a + B->i[i] + shift; 923d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 92455a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 925d6dfbf8fSBarry Smith } 926d6dfbf8fSBarry Smith } 927184914b5SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 928184914b5SBarry Smith if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 929184914b5SBarry Smith ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 9303a40ed3dSBarry Smith } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 931da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 932f830108cSBarry Smith ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 9333a40ed3dSBarry Smith PetscFunctionReturn(0); 934da3a660dSBarry Smith } 9353a40ed3dSBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 9363a40ed3dSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 937184914b5SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 938184914b5SBarry Smith if (xx != bb) { 939184914b5SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 940184914b5SBarry Smith } else { 941184914b5SBarry Smith b = x; 942184914b5SBarry Smith } 943184914b5SBarry Smith ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 944184914b5SBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 945184914b5SBarry Smith ls = ls + shift; 946d6dfbf8fSBarry Smith while (its--) { 947d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 948d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 9494d197716SBarry Smith PLogFlops(4*n+3); 950dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 951dbb450caSBarry Smith v = A->a + A->i[i] + shift; 952d6dfbf8fSBarry Smith sum = b[i]; 953d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 954dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 955d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 956dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 957dbb450caSBarry Smith v = B->a + B->i[i] + shift; 958d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 95955a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 960d6dfbf8fSBarry Smith } 961d6dfbf8fSBarry Smith } 962184914b5SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 963184914b5SBarry Smith if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 964184914b5SBarry Smith ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 9653a40ed3dSBarry Smith } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 966da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 967f830108cSBarry Smith ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 9683a40ed3dSBarry Smith PetscFunctionReturn(0); 969da3a660dSBarry Smith } 9702e8a6d31SBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 9712e8a6d31SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 972184914b5SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 973184914b5SBarry Smith if (xx != bb) { 974184914b5SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 975184914b5SBarry Smith } else { 976184914b5SBarry Smith b = x; 977184914b5SBarry Smith } 978184914b5SBarry Smith ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 979184914b5SBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 980184914b5SBarry Smith ls = ls + shift; 981d6dfbf8fSBarry Smith while (its--) { 982d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 983d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 9844d197716SBarry Smith PLogFlops(4*n+3); 985dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 986dbb450caSBarry Smith v = A->a + A->i[i] + shift; 987d6dfbf8fSBarry Smith sum = b[i]; 988d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 989dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 990d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 991dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 992dbb450caSBarry Smith v = B->a + B->i[i] + shift; 993d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 99455a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 995d6dfbf8fSBarry Smith } 996d6dfbf8fSBarry Smith } 997184914b5SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 998184914b5SBarry Smith if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 999184914b5SBarry Smith ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 10003a40ed3dSBarry Smith } else { 1001a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1002c16cb8f2SBarry Smith } 10033a40ed3dSBarry Smith PetscFunctionReturn(0); 10048a729477SBarry Smith } 1005a66be287SLois Curfman McInnes 10065615d1e5SSatish Balay #undef __FUNC__ 1007d4bb536fSBarry Smith #define __FUNC__ "MatGetInfo_MPIAIJ" 10088f6be9afSLois Curfman McInnes int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1009a66be287SLois Curfman McInnes { 1010a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1011a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 10124e220ebcSLois Curfman McInnes int ierr; 10134e220ebcSLois Curfman McInnes double isend[5], irecv[5]; 1014a66be287SLois Curfman McInnes 10153a40ed3dSBarry Smith PetscFunctionBegin; 10164e220ebcSLois Curfman McInnes info->block_size = 1.0; 10174e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 10184e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 10194e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 10204e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 10214e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 10224e220ebcSLois Curfman McInnes isend[3] += info->memory; isend[4] += info->mallocs; 1023a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 10244e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 10254e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 10264e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 10274e220ebcSLois Curfman McInnes info->memory = isend[3]; 10284e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 1029a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 1030ca161407SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 10314e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 10324e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 10334e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 10344e220ebcSLois Curfman McInnes info->memory = irecv[3]; 10354e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1036a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 1037ca161407SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 10384e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 10394e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 10404e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 10414e220ebcSLois Curfman McInnes info->memory = irecv[3]; 10424e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1043a66be287SLois Curfman McInnes } 10444e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 10454e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 10464e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 10478d700155SBarry Smith info->rows_global = (double)mat->M; 10488d700155SBarry Smith info->columns_global = (double)mat->N; 10498d700155SBarry Smith info->rows_local = (double)mat->m; 10508d700155SBarry Smith info->columns_local = (double)mat->N; 10514e220ebcSLois Curfman McInnes 10523a40ed3dSBarry Smith PetscFunctionReturn(0); 1053a66be287SLois Curfman McInnes } 1054a66be287SLois Curfman McInnes 10555615d1e5SSatish Balay #undef __FUNC__ 1056d4bb536fSBarry Smith #define __FUNC__ "MatSetOption_MPIAIJ" 10578f6be9afSLois Curfman McInnes int MatSetOption_MPIAIJ(Mat A,MatOption op) 1058c74985f6SBarry Smith { 1059c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 10601dee9f54SBarry Smith int ierr; 1061c74985f6SBarry Smith 10623a40ed3dSBarry Smith PetscFunctionBegin; 10636d4a8577SBarry Smith if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 10646d4a8577SBarry Smith op == MAT_YES_NEW_NONZERO_LOCATIONS || 10656da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1066c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 10674787f768SSatish Balay op == MAT_NEW_NONZERO_ALLOCATION_ERR || 10684787f768SSatish Balay op == MAT_NEW_NONZERO_LOCATION_ERR) { 10691dee9f54SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 10701dee9f54SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1071b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1072aeafbbfcSLois Curfman McInnes a->roworiented = 1; 10731dee9f54SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 10741dee9f54SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1075b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 10766da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 10776d4a8577SBarry Smith op == MAT_SYMMETRIC || 10786d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 10796d4a8577SBarry Smith op == MAT_YES_NEW_DIAGONALS) 1080981c4779SBarry Smith PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 10816d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) { 10824b0e389bSBarry Smith a->roworiented = 0; 10831dee9f54SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 10841dee9f54SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 10852b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 108690f02eecSBarry Smith a->donotstash = 1; 10873a40ed3dSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS){ 10883a40ed3dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 10893a40ed3dSBarry Smith } else { 10903a40ed3dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 10913a40ed3dSBarry Smith } 10923a40ed3dSBarry Smith PetscFunctionReturn(0); 1093c74985f6SBarry Smith } 1094c74985f6SBarry Smith 10955615d1e5SSatish Balay #undef __FUNC__ 1096d4bb536fSBarry Smith #define __FUNC__ "MatGetSize_MPIAIJ" 10978f6be9afSLois Curfman McInnes int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1098c74985f6SBarry Smith { 109944a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 11003a40ed3dSBarry Smith 11013a40ed3dSBarry Smith PetscFunctionBegin; 11020752156aSBarry Smith if (m) *m = mat->M; 11030752156aSBarry Smith if (n) *n = mat->N; 11043a40ed3dSBarry Smith PetscFunctionReturn(0); 1105c74985f6SBarry Smith } 1106c74985f6SBarry Smith 11075615d1e5SSatish Balay #undef __FUNC__ 1108d4bb536fSBarry Smith #define __FUNC__ "MatGetLocalSize_MPIAIJ" 11098f6be9afSLois Curfman McInnes int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1110c74985f6SBarry Smith { 111144a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 11123a40ed3dSBarry Smith 11133a40ed3dSBarry Smith PetscFunctionBegin; 11140752156aSBarry Smith if (m) *m = mat->m; 1115f830108cSBarry Smith if (n) *n = mat->n; 11163a40ed3dSBarry Smith PetscFunctionReturn(0); 1117c74985f6SBarry Smith } 1118c74985f6SBarry Smith 11195615d1e5SSatish Balay #undef __FUNC__ 1120d4bb536fSBarry Smith #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 11218f6be9afSLois Curfman McInnes int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1122c74985f6SBarry Smith { 112344a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 11243a40ed3dSBarry Smith 11253a40ed3dSBarry Smith PetscFunctionBegin; 1126c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 11273a40ed3dSBarry Smith PetscFunctionReturn(0); 1128c74985f6SBarry Smith } 1129c74985f6SBarry Smith 11305615d1e5SSatish Balay #undef __FUNC__ 11315615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIAIJ" 11326d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 113339e00950SLois Curfman McInnes { 1134154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 113570f0671dSBarry Smith Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1136154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1137154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 113870f0671dSBarry Smith int *cmap, *idx_p; 113939e00950SLois Curfman McInnes 11403a40ed3dSBarry Smith PetscFunctionBegin; 1141a8c6a408SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 11427a0afa10SBarry Smith mat->getrowactive = PETSC_TRUE; 11437a0afa10SBarry Smith 114470f0671dSBarry Smith if (!mat->rowvalues && (idx || v)) { 11457a0afa10SBarry Smith /* 11467a0afa10SBarry Smith allocate enough space to hold information from the longest row. 11477a0afa10SBarry Smith */ 11487a0afa10SBarry Smith Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1149c16cb8f2SBarry Smith int max = 1,m = mat->m,tmp; 1150c16cb8f2SBarry Smith for ( i=0; i<m; i++ ) { 11517a0afa10SBarry Smith tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 11527a0afa10SBarry Smith if (max < tmp) { max = tmp; } 11537a0afa10SBarry Smith } 11543f97c4b0SBarry Smith mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 11557a0afa10SBarry Smith mat->rowindices = (int *) (mat->rowvalues + max); 11567a0afa10SBarry Smith } 11577a0afa10SBarry Smith 1158a8c6a408SBarry Smith if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1159abc0e9e4SLois Curfman McInnes lrow = row - rstart; 116039e00950SLois Curfman McInnes 1161154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1162154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1163154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 1164f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1165f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1166154123eaSLois Curfman McInnes nztot = nzA + nzB; 1167154123eaSLois Curfman McInnes 116870f0671dSBarry Smith cmap = mat->garray; 1169154123eaSLois Curfman McInnes if (v || idx) { 1170154123eaSLois Curfman McInnes if (nztot) { 1171154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 117270f0671dSBarry Smith int imark = -1; 1173154123eaSLois Curfman McInnes if (v) { 117470f0671dSBarry Smith *v = v_p = mat->rowvalues; 117539e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 117670f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1177154123eaSLois Curfman McInnes else break; 1178154123eaSLois Curfman McInnes } 1179154123eaSLois Curfman McInnes imark = i; 118070f0671dSBarry Smith for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 118170f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1182154123eaSLois Curfman McInnes } 1183154123eaSLois Curfman McInnes if (idx) { 118470f0671dSBarry Smith *idx = idx_p = mat->rowindices; 118570f0671dSBarry Smith if (imark > -1) { 118670f0671dSBarry Smith for ( i=0; i<imark; i++ ) { 118770f0671dSBarry Smith idx_p[i] = cmap[cworkB[i]]; 118870f0671dSBarry Smith } 118970f0671dSBarry Smith } else { 1190154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 119170f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1192154123eaSLois Curfman McInnes else break; 1193154123eaSLois Curfman McInnes } 1194154123eaSLois Curfman McInnes imark = i; 119570f0671dSBarry Smith } 119670f0671dSBarry Smith for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 119770f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 119839e00950SLois Curfman McInnes } 11993f97c4b0SBarry Smith } else { 12001ca473b0SSatish Balay if (idx) *idx = 0; 12011ca473b0SSatish Balay if (v) *v = 0; 12021ca473b0SSatish Balay } 1203154123eaSLois Curfman McInnes } 120439e00950SLois Curfman McInnes *nz = nztot; 1205f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1206f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 12073a40ed3dSBarry Smith PetscFunctionReturn(0); 120839e00950SLois Curfman McInnes } 120939e00950SLois Curfman McInnes 12105615d1e5SSatish Balay #undef __FUNC__ 1211d4bb536fSBarry Smith #define __FUNC__ "MatRestoreRow_MPIAIJ" 12126d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 121339e00950SLois Curfman McInnes { 12147a0afa10SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 12153a40ed3dSBarry Smith 12163a40ed3dSBarry Smith PetscFunctionBegin; 12177a0afa10SBarry Smith if (aij->getrowactive == PETSC_FALSE) { 1218a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 12197a0afa10SBarry Smith } 12207a0afa10SBarry Smith aij->getrowactive = PETSC_FALSE; 12213a40ed3dSBarry Smith PetscFunctionReturn(0); 122239e00950SLois Curfman McInnes } 122339e00950SLois Curfman McInnes 12245615d1e5SSatish Balay #undef __FUNC__ 12255615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIAIJ" 12268f6be9afSLois Curfman McInnes int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1227855ac2c5SLois Curfman McInnes { 1228855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1229ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1230416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1231416022c9SBarry Smith double sum = 0.0; 123204ca555eSLois Curfman McInnes Scalar *v; 123304ca555eSLois Curfman McInnes 12343a40ed3dSBarry Smith PetscFunctionBegin; 123517699dbbSLois Curfman McInnes if (aij->size == 1) { 123614183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 123737fa93a5SLois Curfman McInnes } else { 123804ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 123904ca555eSLois Curfman McInnes v = amat->a; 124004ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 1241aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 1242e20fef11SSatish Balay sum += PetscReal(PetscConj(*v)*(*v)); v++; 124304ca555eSLois Curfman McInnes #else 124404ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 124504ca555eSLois Curfman McInnes #endif 124604ca555eSLois Curfman McInnes } 124704ca555eSLois Curfman McInnes v = bmat->a; 124804ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 1249aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 1250e20fef11SSatish Balay sum += PetscReal(PetscConj(*v)*(*v)); v++; 125104ca555eSLois Curfman McInnes #else 125204ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 125304ca555eSLois Curfman McInnes #endif 125404ca555eSLois Curfman McInnes } 1255ca161407SBarry Smith ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 125604ca555eSLois Curfman McInnes *norm = sqrt(*norm); 12573a40ed3dSBarry Smith } else if (type == NORM_1) { /* max column norm */ 125804ca555eSLois Curfman McInnes double *tmp, *tmp2; 125904ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 1260758f045eSSatish Balay tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp); 1261758f045eSSatish Balay tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2); 1262549d3d68SSatish Balay ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr); 126304ca555eSLois Curfman McInnes *norm = 0.0; 126404ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 126504ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1266579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 126704ca555eSLois Curfman McInnes } 126804ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 126904ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1270579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 127104ca555eSLois Curfman McInnes } 1272ca161407SBarry Smith ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 127304ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 127404ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 127504ca555eSLois Curfman McInnes } 1276606d414cSSatish Balay ierr = PetscFree(tmp);CHKERRQ(ierr); 1277606d414cSSatish Balay ierr = PetscFree(tmp2);CHKERRQ(ierr); 12783a40ed3dSBarry Smith } else if (type == NORM_INFINITY) { /* max row norm */ 1279515d9167SLois Curfman McInnes double ntemp = 0.0; 128004ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1281dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 128204ca555eSLois Curfman McInnes sum = 0.0; 128304ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1284cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 128504ca555eSLois Curfman McInnes } 1286dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 128704ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1288cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 128904ca555eSLois Curfman McInnes } 1290515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 129104ca555eSLois Curfman McInnes } 1292ca161407SBarry Smith ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1293ca161407SBarry Smith } else { 1294a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 129504ca555eSLois Curfman McInnes } 129637fa93a5SLois Curfman McInnes } 12973a40ed3dSBarry Smith PetscFunctionReturn(0); 1298855ac2c5SLois Curfman McInnes } 1299855ac2c5SLois Curfman McInnes 13005615d1e5SSatish Balay #undef __FUNC__ 13015615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIAIJ" 13028f6be9afSLois Curfman McInnes int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1303b7c46309SBarry Smith { 1304b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1305dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1306416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1307b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 13083a40ed3dSBarry Smith Mat B; 1309b7c46309SBarry Smith Scalar *array; 1310b7c46309SBarry Smith 13113a40ed3dSBarry Smith PetscFunctionBegin; 1312d4bb536fSBarry Smith if (matout == PETSC_NULL && M != N) { 1313a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1314d4bb536fSBarry Smith } 1315d4bb536fSBarry Smith 1316d4bb536fSBarry Smith ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1317b7c46309SBarry Smith 1318b7c46309SBarry Smith /* copy over the A part */ 1319ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1320b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1321b7c46309SBarry Smith row = a->rstart; 1322dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1323b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1324416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1325b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1326b7c46309SBarry Smith } 1327b7c46309SBarry Smith aj = Aloc->j; 13284af08d9eSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1329b7c46309SBarry Smith 1330b7c46309SBarry Smith /* copy over the B part */ 1331ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1332b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1333b7c46309SBarry Smith row = a->rstart; 13340452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols); 1335dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1336b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1337416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1338b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1339b7c46309SBarry Smith } 1340606d414cSSatish Balay ierr = PetscFree(ct);CHKERRQ(ierr); 13416d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 13426d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 13433638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 13440de55854SLois Curfman McInnes *matout = B; 13450de55854SLois Curfman McInnes } else { 1346f830108cSBarry Smith PetscOps *Abops; 1347f830108cSBarry Smith struct _MatOps *Aops; 1348f830108cSBarry Smith 13490de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 1350606d414cSSatish Balay ierr = PetscFree(a->rowners);CHKERRQ(ierr); 13510de55854SLois Curfman McInnes ierr = MatDestroy(a->A);CHKERRQ(ierr); 13520de55854SLois Curfman McInnes ierr = MatDestroy(a->B);CHKERRQ(ierr); 1353aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 13540f5bd95cSBarry Smith if (a->colmap) {ierr = PetscTableDelete(a->colmap);CHKERRQ(ierr);} 1355b1fc9764SSatish Balay #else 1356606d414cSSatish Balay if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);} 1357b1fc9764SSatish Balay #endif 1358606d414cSSatish Balay if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);} 13590de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1360a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1361606d414cSSatish Balay ierr = PetscFree(a);CHKERRQ(ierr); 1362f830108cSBarry Smith 1363f830108cSBarry Smith /* 1364f830108cSBarry Smith This is horrible, horrible code. We need to keep the 1365f830108cSBarry Smith A pointers for the bops and ops but copy everything 1366f830108cSBarry Smith else from C. 1367f830108cSBarry Smith */ 1368f830108cSBarry Smith Abops = A->bops; 1369f830108cSBarry Smith Aops = A->ops; 1370549d3d68SSatish Balay ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1371f830108cSBarry Smith A->bops = Abops; 1372f830108cSBarry Smith A->ops = Aops; 13730452661fSBarry Smith PetscHeaderDestroy(B); 13740de55854SLois Curfman McInnes } 13753a40ed3dSBarry Smith PetscFunctionReturn(0); 1376b7c46309SBarry Smith } 1377b7c46309SBarry Smith 13785615d1e5SSatish Balay #undef __FUNC__ 13795615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIAIJ" 13804b967eb1SSatish Balay int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1381a008b906SSatish Balay { 13824b967eb1SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 13834b967eb1SSatish Balay Mat a = aij->A, b = aij->B; 1384a008b906SSatish Balay int ierr,s1,s2,s3; 1385a008b906SSatish Balay 13863a40ed3dSBarry Smith PetscFunctionBegin; 13874b967eb1SSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 13884b967eb1SSatish Balay if (rr) { 1389e1311b90SBarry Smith ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1390a8c6a408SBarry Smith if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 13914b967eb1SSatish Balay /* Overlap communication with computation. */ 139243a90d84SBarry Smith ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1393a008b906SSatish Balay } 13944b967eb1SSatish Balay if (ll) { 1395e1311b90SBarry Smith ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1396a8c6a408SBarry Smith if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1397f830108cSBarry Smith ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 13984b967eb1SSatish Balay } 13994b967eb1SSatish Balay /* scale the diagonal block */ 1400f830108cSBarry Smith ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 14014b967eb1SSatish Balay 14024b967eb1SSatish Balay if (rr) { 14034b967eb1SSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 140443a90d84SBarry Smith ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1405f830108cSBarry Smith ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 14064b967eb1SSatish Balay } 14074b967eb1SSatish Balay 14083a40ed3dSBarry Smith PetscFunctionReturn(0); 1409a008b906SSatish Balay } 1410a008b906SSatish Balay 1411a008b906SSatish Balay 14125615d1e5SSatish Balay #undef __FUNC__ 1413d4bb536fSBarry Smith #define __FUNC__ "MatPrintHelp_MPIAIJ" 14148f6be9afSLois Curfman McInnes int MatPrintHelp_MPIAIJ(Mat A) 1415682d7d0cSBarry Smith { 1416682d7d0cSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 14173a40ed3dSBarry Smith int ierr; 1418682d7d0cSBarry Smith 14193a40ed3dSBarry Smith PetscFunctionBegin; 14203a40ed3dSBarry Smith if (!a->rank) { 14213a40ed3dSBarry Smith ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 14223a40ed3dSBarry Smith } 14233a40ed3dSBarry Smith PetscFunctionReturn(0); 1424682d7d0cSBarry Smith } 1425682d7d0cSBarry Smith 14265615d1e5SSatish Balay #undef __FUNC__ 1427d4bb536fSBarry Smith #define __FUNC__ "MatGetBlockSize_MPIAIJ" 14288f6be9afSLois Curfman McInnes int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 14295a838052SSatish Balay { 14303a40ed3dSBarry Smith PetscFunctionBegin; 14315a838052SSatish Balay *bs = 1; 14323a40ed3dSBarry Smith PetscFunctionReturn(0); 14335a838052SSatish Balay } 14345615d1e5SSatish Balay #undef __FUNC__ 1435d4bb536fSBarry Smith #define __FUNC__ "MatSetUnfactored_MPIAIJ" 14368f6be9afSLois Curfman McInnes int MatSetUnfactored_MPIAIJ(Mat A) 1437bb5a7306SBarry Smith { 1438bb5a7306SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1439bb5a7306SBarry Smith int ierr; 14403a40ed3dSBarry Smith 14413a40ed3dSBarry Smith PetscFunctionBegin; 1442bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 14433a40ed3dSBarry Smith PetscFunctionReturn(0); 1444bb5a7306SBarry Smith } 1445bb5a7306SBarry Smith 1446d4bb536fSBarry Smith #undef __FUNC__ 1447d4bb536fSBarry Smith #define __FUNC__ "MatEqual_MPIAIJ" 1448d4bb536fSBarry Smith int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1449d4bb536fSBarry Smith { 1450d4bb536fSBarry Smith Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1451d4bb536fSBarry Smith Mat a, b, c, d; 1452d4bb536fSBarry Smith PetscTruth flg; 1453d4bb536fSBarry Smith int ierr; 1454d4bb536fSBarry Smith 14553a40ed3dSBarry Smith PetscFunctionBegin; 1456a8c6a408SBarry Smith if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1457d4bb536fSBarry Smith a = matA->A; b = matA->B; 1458d4bb536fSBarry Smith c = matB->A; d = matB->B; 1459d4bb536fSBarry Smith 1460d4bb536fSBarry Smith ierr = MatEqual(a, c, &flg);CHKERRQ(ierr); 1461d4bb536fSBarry Smith if (flg == PETSC_TRUE) { 1462d4bb536fSBarry Smith ierr = MatEqual(b, d, &flg);CHKERRQ(ierr); 1463d4bb536fSBarry Smith } 1464ca161407SBarry Smith ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 14653a40ed3dSBarry Smith PetscFunctionReturn(0); 1466d4bb536fSBarry Smith } 1467d4bb536fSBarry Smith 1468cb5b572fSBarry Smith #undef __FUNC__ 1469cb5b572fSBarry Smith #define __FUNC__ "MatCopy_MPIAIJ" 1470cb5b572fSBarry Smith int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1471cb5b572fSBarry Smith { 1472cb5b572fSBarry Smith int ierr; 1473cb5b572fSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1474cb5b572fSBarry Smith Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1475cb5b572fSBarry Smith 1476cb5b572fSBarry Smith PetscFunctionBegin; 1477cb5b572fSBarry Smith if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1478cb5b572fSBarry Smith /* because of the column compression in the off-processor part of the matrix a->B, 1479cb5b572fSBarry Smith the number of columns in a->B and b->B may be different, hence we cannot call 1480cb5b572fSBarry Smith the MatCopy() directly on the two parts. If need be, we can provide a more 1481cb5b572fSBarry Smith efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1482cb5b572fSBarry Smith then copying the submatrices */ 1483cb5b572fSBarry Smith ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1484cb5b572fSBarry Smith } else { 1485cb5b572fSBarry Smith ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1486cb5b572fSBarry Smith ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1487cb5b572fSBarry Smith } 1488cb5b572fSBarry Smith PetscFunctionReturn(0); 1489cb5b572fSBarry Smith } 1490cb5b572fSBarry Smith 14912e8a6d31SBarry Smith extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 14922f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 14930a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 14947b2a1423SBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **); 14957b2a1423SBarry Smith extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *); 149600e6dbe6SBarry Smith 14978a729477SBarry Smith /* -------------------------------------------------------------------*/ 1498cda55fadSBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1499cda55fadSBarry Smith MatGetRow_MPIAIJ, 1500cda55fadSBarry Smith MatRestoreRow_MPIAIJ, 1501cda55fadSBarry Smith MatMult_MPIAIJ, 1502cda55fadSBarry Smith MatMultAdd_MPIAIJ, 1503cda55fadSBarry Smith MatMultTrans_MPIAIJ, 1504cda55fadSBarry Smith MatMultTransAdd_MPIAIJ, 1505cda55fadSBarry Smith 0, 1506cda55fadSBarry Smith 0, 1507cda55fadSBarry Smith 0, 1508cda55fadSBarry Smith 0, 1509cda55fadSBarry Smith 0, 1510cda55fadSBarry Smith 0, 151144a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1512b7c46309SBarry Smith MatTranspose_MPIAIJ, 1513cda55fadSBarry Smith MatGetInfo_MPIAIJ, 1514cda55fadSBarry Smith MatEqual_MPIAIJ, 1515cda55fadSBarry Smith MatGetDiagonal_MPIAIJ, 1516cda55fadSBarry Smith MatDiagonalScale_MPIAIJ, 1517cda55fadSBarry Smith MatNorm_MPIAIJ, 1518cda55fadSBarry Smith MatAssemblyBegin_MPIAIJ, 1519cda55fadSBarry Smith MatAssemblyEnd_MPIAIJ, 15201eb62cbbSBarry Smith 0, 1521cda55fadSBarry Smith MatSetOption_MPIAIJ, 1522cda55fadSBarry Smith MatZeroEntries_MPIAIJ, 1523cda55fadSBarry Smith MatZeroRows_MPIAIJ, 1524cda55fadSBarry Smith 0, 1525cda55fadSBarry Smith 0, 1526cda55fadSBarry Smith 0, 1527cda55fadSBarry Smith 0, 1528cda55fadSBarry Smith MatGetSize_MPIAIJ, 1529cda55fadSBarry Smith MatGetLocalSize_MPIAIJ, 1530cda55fadSBarry Smith MatGetOwnershipRange_MPIAIJ, 1531cda55fadSBarry Smith 0, 1532cda55fadSBarry Smith 0, 1533cda55fadSBarry Smith 0, 1534cda55fadSBarry Smith 0, 15352e8a6d31SBarry Smith MatDuplicate_MPIAIJ, 1536cda55fadSBarry Smith 0, 1537cda55fadSBarry Smith 0, 1538cda55fadSBarry Smith 0, 1539cda55fadSBarry Smith 0, 1540cda55fadSBarry Smith 0, 1541cda55fadSBarry Smith MatGetSubMatrices_MPIAIJ, 1542cda55fadSBarry Smith MatIncreaseOverlap_MPIAIJ, 1543cda55fadSBarry Smith MatGetValues_MPIAIJ, 1544cb5b572fSBarry Smith MatCopy_MPIAIJ, 1545052efed2SBarry Smith MatPrintHelp_MPIAIJ, 1546cda55fadSBarry Smith MatScale_MPIAIJ, 1547cda55fadSBarry Smith 0, 1548cda55fadSBarry Smith 0, 1549cda55fadSBarry Smith 0, 1550cda55fadSBarry Smith MatGetBlockSize_MPIAIJ, 1551cda55fadSBarry Smith 0, 1552cda55fadSBarry Smith 0, 1553cda55fadSBarry Smith 0, 1554cda55fadSBarry Smith 0, 1555cda55fadSBarry Smith MatFDColoringCreate_MPIAIJ, 1556cda55fadSBarry Smith 0, 1557cda55fadSBarry Smith MatSetUnfactored_MPIAIJ, 1558cda55fadSBarry Smith 0, 1559cda55fadSBarry Smith 0, 1560cda55fadSBarry Smith MatGetSubMatrix_MPIAIJ, 1561cda55fadSBarry Smith 0, 1562cda55fadSBarry Smith 0, 1563cda55fadSBarry Smith MatGetMaps_Petsc}; 156436ce4990SBarry Smith 15652e8a6d31SBarry Smith /* ----------------------------------------------------------------------------------------*/ 15662e8a6d31SBarry Smith 1567fb2e594dSBarry Smith EXTERN_C_BEGIN 15682e8a6d31SBarry Smith #undef __FUNC__ 15692e8a6d31SBarry Smith #define __FUNC__ "MatStoreValues_MPIAIJ" 15702e8a6d31SBarry Smith int MatStoreValues_MPIAIJ(Mat mat) 15712e8a6d31SBarry Smith { 15722e8a6d31SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 15732e8a6d31SBarry Smith int ierr; 15742e8a6d31SBarry Smith 15752e8a6d31SBarry Smith PetscFunctionBegin; 15762e8a6d31SBarry Smith ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 15772e8a6d31SBarry Smith ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 15782e8a6d31SBarry Smith PetscFunctionReturn(0); 15792e8a6d31SBarry Smith } 1580fb2e594dSBarry Smith EXTERN_C_END 15812e8a6d31SBarry Smith 1582fb2e594dSBarry Smith EXTERN_C_BEGIN 15832e8a6d31SBarry Smith #undef __FUNC__ 15842e8a6d31SBarry Smith #define __FUNC__ "MatRetrieveValues_MPIAIJ" 15852e8a6d31SBarry Smith int MatRetrieveValues_MPIAIJ(Mat mat) 15862e8a6d31SBarry Smith { 15872e8a6d31SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 15882e8a6d31SBarry Smith int ierr; 15892e8a6d31SBarry Smith 15902e8a6d31SBarry Smith PetscFunctionBegin; 15912e8a6d31SBarry Smith ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 15922e8a6d31SBarry Smith ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 15932e8a6d31SBarry Smith PetscFunctionReturn(0); 15942e8a6d31SBarry Smith } 1595fb2e594dSBarry Smith EXTERN_C_END 15968a729477SBarry Smith 159727508adbSBarry Smith #include "pc.h" 159827508adbSBarry Smith EXTERN_C_BEGIN 15995ef9f2a5SBarry Smith extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 160027508adbSBarry Smith EXTERN_C_END 160127508adbSBarry Smith 16025615d1e5SSatish Balay #undef __FUNC__ 16035615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIAIJ" 16041987afe7SBarry Smith /*@C 1605ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 16063a511b96SLois Curfman McInnes (the default parallel PETSc format). For good matrix assembly performance 16073a511b96SLois Curfman McInnes the user should preallocate the matrix storage by setting the parameters 16083a511b96SLois Curfman McInnes d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 16093a511b96SLois Curfman McInnes performance can be increased by more than a factor of 50. 16108a729477SBarry Smith 1611db81eaa0SLois Curfman McInnes Collective on MPI_Comm 1612db81eaa0SLois Curfman McInnes 16138a729477SBarry Smith Input Parameters: 1614db81eaa0SLois Curfman McInnes + comm - MPI communicator 16157d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 161692e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 161792e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 16181a3896d6SBarry Smith . n - This value should be the same as the local size used in creating the 16191a3896d6SBarry Smith x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 16201a3896d6SBarry Smith calculated if N is given) For square matrices n is almost always m. 162160d380a7SBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 162260d380a7SBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1623af1d9917SSatish Balay . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1624af1d9917SSatish Balay (same value is used for all local rows) 1625af1d9917SSatish Balay . d_nnz - array containing the number of nonzeros in the various rows of the 1626af1d9917SSatish Balay DIAGONAL portion of the local submatrix (possibly different for each row) 1627af1d9917SSatish Balay or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1628af1d9917SSatish Balay The size of this array is equal to the number of local rows, i.e 'm'. 1629af1d9917SSatish Balay You must leave room for the diagonal entry even if it is zero. 1630af1d9917SSatish Balay . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1631af1d9917SSatish Balay submatrix (same value is used for all local rows). 1632af1d9917SSatish Balay - o_nnz - array containing the number of nonzeros in the various rows of the 1633af1d9917SSatish Balay OFF-DIAGONAL portion of the local submatrix (possibly different for 1634af1d9917SSatish Balay each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1635af1d9917SSatish Balay structure. The size of this array is equal to the number 1636af1d9917SSatish Balay of local rows, i.e 'm'. 16378a729477SBarry Smith 1638ff756334SLois Curfman McInnes Output Parameter: 163944cd7ae7SLois Curfman McInnes . A - the matrix 16408a729477SBarry Smith 1641b259b22eSLois Curfman McInnes Notes: 1642af1d9917SSatish Balay m,n,M,N parameters specify the size of the matrix, and its partitioning across 1643af1d9917SSatish Balay processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1644af1d9917SSatish Balay storage requirements for this matrix. 1645af1d9917SSatish Balay 1646af1d9917SSatish Balay If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1647af1d9917SSatish Balay processor than it must be used on all processors that share the object for 1648af1d9917SSatish Balay that argument. 1649be79a94dSBarry Smith 1650ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1651ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 16520002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 16530002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1654ff756334SLois Curfman McInnes 1655ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1656ff756334SLois Curfman McInnes (possibly both). 1657ff756334SLois Curfman McInnes 1658af1d9917SSatish Balay The parallel matrix is partitioned such that the first m0 rows belong to 1659af1d9917SSatish Balay process 0, the next m1 rows belong to process 1, the next m2 rows belong 1660af1d9917SSatish Balay to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1661af1d9917SSatish Balay 1662af1d9917SSatish Balay The DIAGONAL portion of the local submatrix of a processor can be defined 1663af1d9917SSatish Balay as the submatrix which is obtained by extraction the part corresponding 1664af1d9917SSatish Balay to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1665af1d9917SSatish Balay first row that belongs to the processor, and r2 is the last row belonging 1666af1d9917SSatish Balay to the this processor. This is a square mxm matrix. The remaining portion 1667af1d9917SSatish Balay of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1668af1d9917SSatish Balay 1669af1d9917SSatish Balay If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1670af1d9917SSatish Balay 16715511cfe3SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 16725511cfe3SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 16735511cfe3SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 16745511cfe3SLois Curfman McInnes 16755511cfe3SLois Curfman McInnes Options Database Keys: 1676db81eaa0SLois Curfman McInnes + -mat_aij_no_inode - Do not use inodes 1677db81eaa0SLois Curfman McInnes . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1678db81eaa0SLois Curfman McInnes - -mat_aij_oneindex - Internally use indexing starting at 1 1679db81eaa0SLois Curfman McInnes rather than 0. Note that when calling MatSetValues(), 1680db81eaa0SLois Curfman McInnes the user still MUST index entries starting at 0! 1681494eafd4SBarry Smith . -mat_mpi - use the parallel matrix data structures even on one processor 1682494eafd4SBarry Smith (defaults to using SeqBAIJ format on one processor) 1683494eafd4SBarry Smith . -mat_mpi - use the parallel matrix data structures even on one processor 1684494eafd4SBarry Smith (defaults to using SeqAIJ format on one processor) 1685494eafd4SBarry Smith 16865511cfe3SLois Curfman McInnes 1687af1d9917SSatish Balay Example usage: 1688e0245417SLois Curfman McInnes 1689af1d9917SSatish Balay Consider the following 8x8 matrix with 34 non-zero values, that is 1690af1d9917SSatish Balay assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1691af1d9917SSatish Balay proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1692af1d9917SSatish Balay as follows: 16932191d07cSBarry Smith 1694db81eaa0SLois Curfman McInnes .vb 1695af1d9917SSatish Balay 1 2 0 | 0 3 0 | 0 4 1696af1d9917SSatish Balay Proc0 0 5 6 | 7 0 0 | 8 0 1697af1d9917SSatish Balay 9 0 10 | 11 0 0 | 12 0 1698af1d9917SSatish Balay ------------------------------------- 1699af1d9917SSatish Balay 13 0 14 | 15 16 17 | 0 0 1700af1d9917SSatish Balay Proc1 0 18 0 | 19 20 21 | 0 0 1701af1d9917SSatish Balay 0 0 0 | 22 23 0 | 24 0 1702af1d9917SSatish Balay ------------------------------------- 1703af1d9917SSatish Balay Proc2 25 26 27 | 0 0 28 | 29 0 1704af1d9917SSatish Balay 30 0 0 | 31 32 33 | 0 34 1705db81eaa0SLois Curfman McInnes .ve 1706b810aeb4SBarry Smith 1707af1d9917SSatish Balay This can be represented as a collection of submatrices as: 17085511cfe3SLois Curfman McInnes 1709af1d9917SSatish Balay .vb 1710af1d9917SSatish Balay A B C 1711af1d9917SSatish Balay D E F 1712af1d9917SSatish Balay G H I 1713af1d9917SSatish Balay .ve 1714af1d9917SSatish Balay 1715af1d9917SSatish Balay Where the submatrices A,B,C are owned by proc0, D,E,F are 1716af1d9917SSatish Balay owned by proc1, G,H,I are owned by proc2. 1717af1d9917SSatish Balay 1718af1d9917SSatish Balay The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1719af1d9917SSatish Balay The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1720af1d9917SSatish Balay The 'M','N' parameters are 8,8, and have the same values on all procs. 1721af1d9917SSatish Balay 1722af1d9917SSatish Balay The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1723af1d9917SSatish Balay submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1724af1d9917SSatish Balay corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1725af1d9917SSatish Balay Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1726af1d9917SSatish Balay part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1727af1d9917SSatish Balay matrix, ans [DF] as another SeqAIJ matrix. 1728af1d9917SSatish Balay 1729af1d9917SSatish Balay When d_nz, o_nz parameters are specified, d_nz storage elements are 1730af1d9917SSatish Balay allocated for every row of the local diagonal submatrix, and o_nz 1731af1d9917SSatish Balay storage locations are allocated for every row of the OFF-DIAGONAL submat. 1732af1d9917SSatish Balay One way to choose d_nz and o_nz is to use the max nonzerors per local 1733af1d9917SSatish Balay rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1734af1d9917SSatish Balay In this case, the values of d_nz,o_nz are: 1735af1d9917SSatish Balay .vb 1736af1d9917SSatish Balay proc0 : dnz = 2, o_nz = 2 1737af1d9917SSatish Balay proc1 : dnz = 3, o_nz = 2 1738af1d9917SSatish Balay proc2 : dnz = 1, o_nz = 4 1739af1d9917SSatish Balay .ve 1740af1d9917SSatish Balay We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1741af1d9917SSatish Balay translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1742af1d9917SSatish Balay for proc3. i.e we are using 12+15+10=37 storage locations to store 1743af1d9917SSatish Balay 34 values. 1744af1d9917SSatish Balay 1745af1d9917SSatish Balay When d_nnz, o_nnz parameters are specified, the storage is specified 1746af1d9917SSatish Balay for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1747af1d9917SSatish Balay In the above case the values for d_nnz,o_nnz are: 1748af1d9917SSatish Balay .vb 1749af1d9917SSatish Balay proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1750af1d9917SSatish Balay proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1751af1d9917SSatish Balay proc2: d_nnz = [1,1] and o_nnz = [4,4] 1752af1d9917SSatish Balay .ve 1753af1d9917SSatish Balay Here the space allocated is sum of all the above values i.e 34, and 1754af1d9917SSatish Balay hence pre-allocation is perfect. 17553a511b96SLois Curfman McInnes 1756027ccd11SLois Curfman McInnes Level: intermediate 1757027ccd11SLois Curfman McInnes 1758dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1759ff756334SLois Curfman McInnes 1760fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 17618a729477SBarry Smith @*/ 1762e1311b90SBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 17638a729477SBarry Smith { 176444cd7ae7SLois Curfman McInnes Mat B; 176544cd7ae7SLois Curfman McInnes Mat_MPIAIJ *b; 1766eac7125bSBarry Smith int ierr, i,size,flag1 = 0, flag2 = 0; 1767416022c9SBarry Smith 17683a40ed3dSBarry Smith PetscFunctionBegin; 17691d55c564SBarry Smith 1770b73539f3SBarry Smith if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz); 1771b73539f3SBarry Smith if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz); 17721d55c564SBarry Smith if (d_nnz) { 17731d55c564SBarry Smith for (i=0; i<m; i++) { 1774b73539f3SBarry Smith if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 17751d55c564SBarry Smith } 17761d55c564SBarry Smith } 17771d55c564SBarry Smith if (o_nnz) { 17781d55c564SBarry Smith for (i=0; i<m; i++) { 1779b73539f3SBarry Smith if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 17801d55c564SBarry Smith } 17811d55c564SBarry Smith } 17821d55c564SBarry Smith 17831dab6e02SBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 17847be0e774SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr); 17857be0e774SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 17867be0e774SBarry Smith if (!flag1 && !flag2 && size == 1) { 17873914022bSBarry Smith if (M == PETSC_DECIDE) M = m; 17883914022bSBarry Smith if (N == PETSC_DECIDE) N = n; 17893914022bSBarry Smith ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 17903a40ed3dSBarry Smith PetscFunctionReturn(0); 17913914022bSBarry Smith } 17923914022bSBarry Smith 179344cd7ae7SLois Curfman McInnes *A = 0; 17943f1db9ecSBarry Smith PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 179544cd7ae7SLois Curfman McInnes PLogObjectCreate(B); 179644cd7ae7SLois Curfman McInnes B->data = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b); 1797549d3d68SSatish Balay ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1798549d3d68SSatish Balay ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1799e1311b90SBarry Smith B->ops->destroy = MatDestroy_MPIAIJ; 1800e1311b90SBarry Smith B->ops->view = MatView_MPIAIJ; 180144cd7ae7SLois Curfman McInnes B->factor = 0; 180244cd7ae7SLois Curfman McInnes B->assembled = PETSC_FALSE; 180390f02eecSBarry Smith B->mapping = 0; 1804d6dfbf8fSBarry Smith 180547794344SBarry Smith B->insertmode = NOT_SET_VALUES; 18069eb4d147SSatish Balay b->size = size; 18071dab6e02SBarry Smith ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 18081eb62cbbSBarry Smith 18093a40ed3dSBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1810a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 18113a40ed3dSBarry Smith } 18121987afe7SBarry Smith 1813eac7125bSBarry Smith ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1814eac7125bSBarry Smith ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 181544cd7ae7SLois Curfman McInnes b->m = m; B->m = m; 181644cd7ae7SLois Curfman McInnes b->n = n; B->n = n; 181744cd7ae7SLois Curfman McInnes b->N = N; B->N = N; 181844cd7ae7SLois Curfman McInnes b->M = M; B->M = M; 18191eb62cbbSBarry Smith 1820c7fcc2eaSBarry Smith /* the information in the maps duplicates the information computed below, eventually 1821c7fcc2eaSBarry Smith we should remove the duplicate information that is not contained in the maps */ 1822253a16ddSBarry Smith ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1823253a16ddSBarry Smith ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1824c7fcc2eaSBarry Smith 18251eb62cbbSBarry Smith /* build local table of row and column ownerships */ 182644cd7ae7SLois Curfman McInnes b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 1827f09e8eb9SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1828603f58a4SSatish Balay b->cowners = b->rowners + b->size + 2; 1829ca161407SBarry Smith ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 183044cd7ae7SLois Curfman McInnes b->rowners[0] = 0; 183144cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 183244cd7ae7SLois Curfman McInnes b->rowners[i] += b->rowners[i-1]; 18338a729477SBarry Smith } 183444cd7ae7SLois Curfman McInnes b->rstart = b->rowners[b->rank]; 183544cd7ae7SLois Curfman McInnes b->rend = b->rowners[b->rank+1]; 1836ca161407SBarry Smith ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 183744cd7ae7SLois Curfman McInnes b->cowners[0] = 0; 183844cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 183944cd7ae7SLois Curfman McInnes b->cowners[i] += b->cowners[i-1]; 18408a729477SBarry Smith } 184144cd7ae7SLois Curfman McInnes b->cstart = b->cowners[b->rank]; 184244cd7ae7SLois Curfman McInnes b->cend = b->cowners[b->rank+1]; 18438a729477SBarry Smith 18445ace5be8SLois Curfman McInnes if (d_nz == PETSC_DEFAULT) d_nz = 5; 1845029af93fSBarry Smith ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 184644cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->A); 18477b8455f0SLois Curfman McInnes if (o_nz == PETSC_DEFAULT) o_nz = 0; 1848029af93fSBarry Smith ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 184944cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->B); 18508a729477SBarry Smith 18511eb62cbbSBarry Smith /* build cache for off array entries formed */ 18528798bf22SSatish Balay ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 185390f02eecSBarry Smith b->donotstash = 0; 185444cd7ae7SLois Curfman McInnes b->colmap = 0; 185544cd7ae7SLois Curfman McInnes b->garray = 0; 185644cd7ae7SLois Curfman McInnes b->roworiented = 1; 18578a729477SBarry Smith 18581eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 185944cd7ae7SLois Curfman McInnes b->lvec = 0; 186044cd7ae7SLois Curfman McInnes b->Mvctx = 0; 18618a729477SBarry Smith 18627a0afa10SBarry Smith /* stuff for MatGetRow() */ 186344cd7ae7SLois Curfman McInnes b->rowindices = 0; 186444cd7ae7SLois Curfman McInnes b->rowvalues = 0; 186544cd7ae7SLois Curfman McInnes b->getrowactive = PETSC_FALSE; 18667a0afa10SBarry Smith 18672e8a6d31SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 18682e8a6d31SBarry Smith "MatStoreValues_MPIAIJ", 18692e8a6d31SBarry Smith (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 18702e8a6d31SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 18712e8a6d31SBarry Smith "MatRetrieveValues_MPIAIJ", 18722e8a6d31SBarry Smith (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 18735ef9f2a5SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 18745ef9f2a5SBarry Smith "MatGetDiagonalBlock_MPIAIJ", 18755ef9f2a5SBarry Smith (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 187644cd7ae7SLois Curfman McInnes *A = B; 18773a40ed3dSBarry Smith PetscFunctionReturn(0); 1878d6dfbf8fSBarry Smith } 1879c74985f6SBarry Smith 18805615d1e5SSatish Balay #undef __FUNC__ 18812e8a6d31SBarry Smith #define __FUNC__ "MatDuplicate_MPIAIJ" 18822e8a6d31SBarry Smith int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1883d6dfbf8fSBarry Smith { 1884d6dfbf8fSBarry Smith Mat mat; 1885416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1886a1b97e82SLois Curfman McInnes int ierr, len=0, flg; 1887d6dfbf8fSBarry Smith 18883a40ed3dSBarry Smith PetscFunctionBegin; 1889416022c9SBarry Smith *newmat = 0; 18903f1db9ecSBarry Smith PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1891a5a9c739SBarry Smith PLogObjectCreate(mat); 18920452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a); 1893549d3d68SSatish Balay ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1894e1311b90SBarry Smith mat->ops->destroy = MatDestroy_MPIAIJ; 1895e1311b90SBarry Smith mat->ops->view = MatView_MPIAIJ; 1896d6dfbf8fSBarry Smith mat->factor = matin->factor; 1897c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1898e7641de0SSatish Balay mat->insertmode = NOT_SET_VALUES; 1899d6dfbf8fSBarry Smith 190044cd7ae7SLois Curfman McInnes a->m = mat->m = oldmat->m; 190144cd7ae7SLois Curfman McInnes a->n = mat->n = oldmat->n; 190244cd7ae7SLois Curfman McInnes a->M = mat->M = oldmat->M; 190344cd7ae7SLois Curfman McInnes a->N = mat->N = oldmat->N; 1904d6dfbf8fSBarry Smith 1905416022c9SBarry Smith a->rstart = oldmat->rstart; 1906416022c9SBarry Smith a->rend = oldmat->rend; 1907416022c9SBarry Smith a->cstart = oldmat->cstart; 1908416022c9SBarry Smith a->cend = oldmat->cend; 190917699dbbSLois Curfman McInnes a->size = oldmat->size; 191017699dbbSLois Curfman McInnes a->rank = oldmat->rank; 1911e7641de0SSatish Balay a->donotstash = oldmat->donotstash; 1912e7641de0SSatish Balay a->roworiented = oldmat->roworiented; 1913e7641de0SSatish Balay a->rowindices = 0; 1914bcd2baecSBarry Smith a->rowvalues = 0; 1915bcd2baecSBarry Smith a->getrowactive = PETSC_FALSE; 1916d6dfbf8fSBarry Smith 1917603f58a4SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1918f09e8eb9SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1919603f58a4SSatish Balay a->cowners = a->rowners + a->size + 2; 1920549d3d68SSatish Balay ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 19218798bf22SSatish Balay ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 19222ee70a88SLois Curfman McInnes if (oldmat->colmap) { 1923aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 19240f5bd95cSBarry Smith ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1925b1fc9764SSatish Balay #else 19260452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1927416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1928549d3d68SSatish Balay ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr); 1929b1fc9764SSatish Balay #endif 1930416022c9SBarry Smith } else a->colmap = 0; 19313f41c07dSBarry Smith if (oldmat->garray) { 19323f41c07dSBarry Smith len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 19333f41c07dSBarry Smith a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray); 1934464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 1935549d3d68SSatish Balay if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1936416022c9SBarry Smith } else a->garray = 0; 1937d6dfbf8fSBarry Smith 1938416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1939416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1940a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1941416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 19422e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1943416022c9SBarry Smith PLogObjectParent(mat,a->A); 19442e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1945416022c9SBarry Smith PLogObjectParent(mat,a->B); 19465dd7a6c7SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 194725cdf11fSBarry Smith if (flg) { 1948682d7d0cSBarry Smith ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1949682d7d0cSBarry Smith } 195027508adbSBarry Smith ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 19518a729477SBarry Smith *newmat = mat; 19523a40ed3dSBarry Smith PetscFunctionReturn(0); 19538a729477SBarry Smith } 1954416022c9SBarry Smith 195577c4ece6SBarry Smith #include "sys.h" 1956416022c9SBarry Smith 19575615d1e5SSatish Balay #undef __FUNC__ 19585615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIAIJ" 195919bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1960416022c9SBarry Smith { 1961d65a2f8fSBarry Smith Mat A; 1962d65a2f8fSBarry Smith Scalar *vals,*svals; 196319bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 1964416022c9SBarry Smith MPI_Status status; 19653a40ed3dSBarry Smith int i, nz, ierr, j,rstart, rend, fd; 196617699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1967d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1968b362ba68SBarry Smith int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1969416022c9SBarry Smith 19703a40ed3dSBarry Smith PetscFunctionBegin; 19711dab6e02SBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 19721dab6e02SBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197317699dbbSLois Curfman McInnes if (!rank) { 197490ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 19750752156aSBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1976a8c6a408SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1977d64ed03dSBarry Smith if (header[3] < 0) { 1978a8c6a408SBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1979d64ed03dSBarry Smith } 19806c5fab8fSBarry Smith } 19816c5fab8fSBarry Smith 1982ca161407SBarry Smith ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1983416022c9SBarry Smith M = header[1]; N = header[2]; 1984416022c9SBarry Smith /* determine ownership of all rows */ 198517699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 19860452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1987ca161407SBarry Smith ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1988416022c9SBarry Smith rowners[0] = 0; 198917699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1990416022c9SBarry Smith rowners[i] += rowners[i-1]; 1991416022c9SBarry Smith } 199217699dbbSLois Curfman McInnes rstart = rowners[rank]; 199317699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1994416022c9SBarry Smith 1995416022c9SBarry Smith /* distribute row lengths to all processors */ 19960452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 1997416022c9SBarry Smith offlens = ourlens + (rend-rstart); 199817699dbbSLois Curfman McInnes if (!rank) { 19990452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 20000752156aSBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 20010452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 200217699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 2003ca161407SBarry Smith ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2004606d414cSSatish Balay ierr = PetscFree(sndcounts);CHKERRQ(ierr); 20053a40ed3dSBarry Smith } else { 2006ca161407SBarry Smith ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 2007416022c9SBarry Smith } 2008416022c9SBarry Smith 200917699dbbSLois Curfman McInnes if (!rank) { 2010416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 20110452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 2012549d3d68SSatish Balay ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 201317699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 2014416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 2015416022c9SBarry Smith procsnz[i] += rowlengths[j]; 2016416022c9SBarry Smith } 2017416022c9SBarry Smith } 2018606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2019416022c9SBarry Smith 2020416022c9SBarry Smith /* determine max buffer needed and allocate it */ 2021416022c9SBarry Smith maxnz = 0; 202217699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 20230452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 2024416022c9SBarry Smith } 20250452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 2026416022c9SBarry Smith 2027416022c9SBarry Smith /* read in my part of the matrix column indices */ 2028416022c9SBarry Smith nz = procsnz[0]; 20290452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 20300752156aSBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2031d65a2f8fSBarry Smith 2032d65a2f8fSBarry Smith /* read in every one elses and ship off */ 203317699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 2034d65a2f8fSBarry Smith nz = procsnz[i]; 20350752156aSBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2036ca161407SBarry Smith ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2037d65a2f8fSBarry Smith } 2038606d414cSSatish Balay ierr = PetscFree(cols);CHKERRQ(ierr); 20393a40ed3dSBarry Smith } else { 2040416022c9SBarry Smith /* determine buffer space needed for message */ 2041416022c9SBarry Smith nz = 0; 2042416022c9SBarry Smith for ( i=0; i<m; i++ ) { 2043416022c9SBarry Smith nz += ourlens[i]; 2044416022c9SBarry Smith } 20450452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 2046416022c9SBarry Smith 2047416022c9SBarry Smith /* receive message of column indices*/ 2048ca161407SBarry Smith ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2049ca161407SBarry Smith ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2050a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2051416022c9SBarry Smith } 2052416022c9SBarry Smith 2053b362ba68SBarry Smith /* determine column ownership if matrix is not square */ 2054b362ba68SBarry Smith if (N != M) { 2055b362ba68SBarry Smith n = N/size + ((N % size) > rank); 2056b362ba68SBarry Smith ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2057b362ba68SBarry Smith cstart = cend - n; 2058b362ba68SBarry Smith } else { 2059b362ba68SBarry Smith cstart = rstart; 2060b362ba68SBarry Smith cend = rend; 2061fb2e594dSBarry Smith n = cend - cstart; 2062b362ba68SBarry Smith } 2063b362ba68SBarry Smith 2064416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 2065549d3d68SSatish Balay ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2066416022c9SBarry Smith jj = 0; 2067416022c9SBarry Smith for ( i=0; i<m; i++ ) { 2068416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 2069b362ba68SBarry Smith if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2070416022c9SBarry Smith jj++; 2071416022c9SBarry Smith } 2072416022c9SBarry Smith } 2073d65a2f8fSBarry Smith 2074d65a2f8fSBarry Smith /* create our matrix */ 2075416022c9SBarry Smith for ( i=0; i<m; i++ ) { 2076416022c9SBarry Smith ourlens[i] -= offlens[i]; 2077416022c9SBarry Smith } 2078b362ba68SBarry Smith ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2079d65a2f8fSBarry Smith A = *newmat; 2080fb2e594dSBarry Smith ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2081d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 2082d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 2083d65a2f8fSBarry Smith } 2084416022c9SBarry Smith 208517699dbbSLois Curfman McInnes if (!rank) { 20860452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 2087416022c9SBarry Smith 2088416022c9SBarry Smith /* read in my part of the matrix numerical values */ 2089416022c9SBarry Smith nz = procsnz[0]; 20900752156aSBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2091d65a2f8fSBarry Smith 2092d65a2f8fSBarry Smith /* insert into matrix */ 2093d65a2f8fSBarry Smith jj = rstart; 2094d65a2f8fSBarry Smith smycols = mycols; 2095d65a2f8fSBarry Smith svals = vals; 2096d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 2097d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2098d65a2f8fSBarry Smith smycols += ourlens[i]; 2099d65a2f8fSBarry Smith svals += ourlens[i]; 2100d65a2f8fSBarry Smith jj++; 2101416022c9SBarry Smith } 2102416022c9SBarry Smith 2103d65a2f8fSBarry Smith /* read in other processors and ship out */ 210417699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 2105416022c9SBarry Smith nz = procsnz[i]; 21060752156aSBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2107ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2108416022c9SBarry Smith } 2109606d414cSSatish Balay ierr = PetscFree(procsnz);CHKERRQ(ierr); 21103a40ed3dSBarry Smith } else { 2111d65a2f8fSBarry Smith /* receive numeric values */ 21120452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 2113416022c9SBarry Smith 2114d65a2f8fSBarry Smith /* receive message of values*/ 2115ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2116ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2117a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2118d65a2f8fSBarry Smith 2119d65a2f8fSBarry Smith /* insert into matrix */ 2120d65a2f8fSBarry Smith jj = rstart; 2121d65a2f8fSBarry Smith smycols = mycols; 2122d65a2f8fSBarry Smith svals = vals; 2123d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 2124d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2125d65a2f8fSBarry Smith smycols += ourlens[i]; 2126d65a2f8fSBarry Smith svals += ourlens[i]; 2127d65a2f8fSBarry Smith jj++; 2128d65a2f8fSBarry Smith } 2129d65a2f8fSBarry Smith } 2130606d414cSSatish Balay ierr = PetscFree(ourlens);CHKERRQ(ierr); 2131606d414cSSatish Balay ierr = PetscFree(vals);CHKERRQ(ierr); 2132606d414cSSatish Balay ierr = PetscFree(mycols);CHKERRQ(ierr); 2133606d414cSSatish Balay ierr = PetscFree(rowners);CHKERRQ(ierr); 2134d65a2f8fSBarry Smith 21356d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 21366d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 21373a40ed3dSBarry Smith PetscFunctionReturn(0); 2138416022c9SBarry Smith } 2139a0ff6018SBarry Smith 214029da9460SBarry Smith #undef __FUNC__ 214129da9460SBarry Smith #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2142a0ff6018SBarry Smith /* 214329da9460SBarry Smith Not great since it makes two copies of the submatrix, first an SeqAIJ 214429da9460SBarry Smith in local and then by concatenating the local matrices the end result. 214529da9460SBarry Smith Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2146a0ff6018SBarry Smith */ 21477b2a1423SBarry Smith int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2148a0ff6018SBarry Smith { 214900e6dbe6SBarry Smith int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2150fee21e36SBarry Smith Mat *local,M, Mreuse; 215100e6dbe6SBarry Smith Scalar *vwork,*aa; 215200e6dbe6SBarry Smith MPI_Comm comm = mat->comm; 215300e6dbe6SBarry Smith Mat_SeqAIJ *aij; 215400e6dbe6SBarry Smith int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2155a0ff6018SBarry Smith 2156a0ff6018SBarry Smith PetscFunctionBegin; 21571dab6e02SBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 21581dab6e02SBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 215900e6dbe6SBarry Smith 2160fee21e36SBarry Smith if (call == MAT_REUSE_MATRIX) { 2161fee21e36SBarry Smith ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2162fee21e36SBarry Smith if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2163fee21e36SBarry Smith local = &Mreuse; 2164fee21e36SBarry Smith ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2165fee21e36SBarry Smith } else { 2166a0ff6018SBarry Smith ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2167fee21e36SBarry Smith Mreuse = *local; 2168606d414cSSatish Balay ierr = PetscFree(local);CHKERRQ(ierr); 2169fee21e36SBarry Smith } 2170a0ff6018SBarry Smith 2171a0ff6018SBarry Smith /* 2172a0ff6018SBarry Smith m - number of local rows 2173a0ff6018SBarry Smith n - number of columns (same on all processors) 2174a0ff6018SBarry Smith rstart - first row in new global matrix generated 2175a0ff6018SBarry Smith */ 2176fee21e36SBarry Smith ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2177a0ff6018SBarry Smith if (call == MAT_INITIAL_MATRIX) { 2178fee21e36SBarry Smith aij = (Mat_SeqAIJ *) (Mreuse)->data; 2179a8c6a408SBarry Smith if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 218000e6dbe6SBarry Smith ii = aij->i; 218100e6dbe6SBarry Smith jj = aij->j; 218200e6dbe6SBarry Smith 2183a0ff6018SBarry Smith /* 218400e6dbe6SBarry Smith Determine the number of non-zeros in the diagonal and off-diagonal 218500e6dbe6SBarry Smith portions of the matrix in order to do correct preallocation 2186a0ff6018SBarry Smith */ 218700e6dbe6SBarry Smith 218800e6dbe6SBarry Smith /* first get start and end of "diagonal" columns */ 21896a6a5d1dSBarry Smith if (csize == PETSC_DECIDE) { 219000e6dbe6SBarry Smith nlocal = n/size + ((n % size) > rank); 21916a6a5d1dSBarry Smith } else { 21926a6a5d1dSBarry Smith nlocal = csize; 21936a6a5d1dSBarry Smith } 2194ca161407SBarry Smith ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 219500e6dbe6SBarry Smith rstart = rend - nlocal; 21966a6a5d1dSBarry Smith if (rank == size - 1 && rend != n) { 21976a6a5d1dSBarry Smith SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 21986a6a5d1dSBarry Smith } 219900e6dbe6SBarry Smith 220000e6dbe6SBarry Smith /* next, compute all the lengths */ 220100e6dbe6SBarry Smith dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 220200e6dbe6SBarry Smith olens = dlens + m; 220300e6dbe6SBarry Smith for ( i=0; i<m; i++ ) { 220400e6dbe6SBarry Smith jend = ii[i+1] - ii[i]; 220500e6dbe6SBarry Smith olen = 0; 220600e6dbe6SBarry Smith dlen = 0; 220700e6dbe6SBarry Smith for ( j=0; j<jend; j++ ) { 220800e6dbe6SBarry Smith if ( *jj < rstart || *jj >= rend) olen++; 220900e6dbe6SBarry Smith else dlen++; 221000e6dbe6SBarry Smith jj++; 221100e6dbe6SBarry Smith } 221200e6dbe6SBarry Smith olens[i] = olen; 221300e6dbe6SBarry Smith dlens[i] = dlen; 221400e6dbe6SBarry Smith } 22152d207970SBarry Smith ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2216606d414cSSatish Balay ierr = PetscFree(dlens);CHKERRQ(ierr); 2217a0ff6018SBarry Smith } else { 2218a0ff6018SBarry Smith int ml,nl; 2219a0ff6018SBarry Smith 2220a0ff6018SBarry Smith M = *newmat; 2221a0ff6018SBarry Smith ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2222a8c6a408SBarry Smith if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2223a0ff6018SBarry Smith ierr = MatZeroEntries(M);CHKERRQ(ierr); 2224c48de900SBarry Smith /* 2225c48de900SBarry Smith The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2226c48de900SBarry Smith rather than the slower MatSetValues(). 2227c48de900SBarry Smith */ 2228c48de900SBarry Smith M->was_assembled = PETSC_TRUE; 2229c48de900SBarry Smith M->assembled = PETSC_FALSE; 2230a0ff6018SBarry Smith } 2231a0ff6018SBarry Smith ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2232fee21e36SBarry Smith aij = (Mat_SeqAIJ *) (Mreuse)->data; 2233a8c6a408SBarry Smith if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 223400e6dbe6SBarry Smith ii = aij->i; 223500e6dbe6SBarry Smith jj = aij->j; 223600e6dbe6SBarry Smith aa = aij->a; 2237a0ff6018SBarry Smith for (i=0; i<m; i++) { 2238a0ff6018SBarry Smith row = rstart + i; 223900e6dbe6SBarry Smith nz = ii[i+1] - ii[i]; 224000e6dbe6SBarry Smith cwork = jj; jj += nz; 224100e6dbe6SBarry Smith vwork = aa; aa += nz; 22428c638d02SBarry Smith ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2243a0ff6018SBarry Smith } 2244a0ff6018SBarry Smith 2245a0ff6018SBarry Smith ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2246a0ff6018SBarry Smith ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2247a0ff6018SBarry Smith *newmat = M; 2248fee21e36SBarry Smith 2249fee21e36SBarry Smith /* save submatrix used in processor for next request */ 2250fee21e36SBarry Smith if (call == MAT_INITIAL_MATRIX) { 2251fee21e36SBarry Smith ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2252fee21e36SBarry Smith ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2253fee21e36SBarry Smith } 2254fee21e36SBarry Smith 2255a0ff6018SBarry Smith PetscFunctionReturn(0); 2256a0ff6018SBarry Smith } 2257