1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mmbaij.c,v 1.20 1999/01/11 17:18:50 balay Exp bsmith $"; 3 #endif 4 5 6 /* 7 Support for the parallel BAIJ matrix vector multiply 8 */ 9 #include "src/mat/impls/baij/mpi/mpibaij.h" 10 #include "src/vec/vecimpl.h" 11 12 #undef __FUNC__ 13 #define __FUNC__ "MatSetUpMultiply_MPIBAIJ" 14 int MatSetUpMultiply_MPIBAIJ(Mat mat) 15 { 16 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 17 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *) (baij->B->data); 18 int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 19 int col,bs = baij->bs,*tmp,*stmp; 20 IS from,to; 21 Vec gvec; 22 #if defined (USE_CTABLE) 23 Table gid1_lid1; 24 CTablePos tpos; 25 int gid, lid; 26 #endif 27 28 PetscFunctionBegin; 29 30 #if defined (USE_CTABLE) 31 /* use a table - Mark Adams */ 32 TableCreate( &gid1_lid1, B->mbs ); 33 for ( i=0; i<B->mbs; i++ ) { 34 for ( j=0; j<B->ilen[i]; j++ ) { 35 int gid1 = aj[B->i[i] + j] + 1; 36 if ( !TableFind( gid1_lid1, gid1 ) ){ 37 /* one based table */ 38 ierr = TableAdd( gid1_lid1, gid1, ++ec ); CHKERRQ(ierr); 39 } 40 } 41 } 42 /* form array of columns we need */ 43 garray = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(garray); 44 tmp = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) ); CHKPTRQ(tmp); 45 ierr = TableGetHeadPosition( gid1_lid1, &tpos ); CHKERRQ(ierr); 46 while( tpos ) { 47 ierr = TableGetNext( gid1_lid1, &tpos, &gid, &lid ); CHKERRQ(ierr); 48 gid--; lid--; 49 garray[lid] = gid; 50 } 51 ierr = PetscSortInt(ec,garray); CHKERRQ(ierr); 52 /* qsort( garray, ec, sizeof(int), intcomparcarc ); */ 53 TableRemoveAll( gid1_lid1 ); 54 for ( i=0; i<ec; i++ ) { 55 ierr = TableAdd( gid1_lid1, garray[i] + 1, i + 1 ); CHKERRQ(ierr); 56 } 57 /* compact out the extra columns in B */ 58 for ( i=0; i<B->mbs; i++ ) { 59 for ( j=0; j<B->ilen[i]; j++ ) { 60 int gid1 = aj[B->i[i] + j] + 1; 61 lid = TableFind( gid1_lid1, gid1 ) - 1; 62 aj[B->i[i] + j] = lid; 63 } 64 } 65 B->nbs = ec; 66 B->n = ec*B->bs; 67 TableDelete(gid1_lid1); 68 /* Mark Adams */ 69 #else 70 /* For the first stab we make an array as long as the number of columns */ 71 /* mark those columns that are in baij->B */ 72 indices = (int *) PetscMalloc( (Nbs+1)*sizeof(int) ); CHKPTRQ(indices); 73 PetscMemzero(indices,Nbs*sizeof(int)); 74 for ( i=0; i<B->mbs; i++ ) { 75 for ( j=0; j<B->ilen[i]; j++ ) { 76 if (!indices[aj[B->i[i] + j]]) ec++; 77 indices[aj[B->i[i] + j] ] = 1; 78 } 79 } 80 81 /* form array of columns we need */ 82 garray = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(garray); 83 tmp = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) ); CHKPTRQ(tmp) 84 ec = 0; 85 for ( i=0; i<Nbs; i++ ) { 86 if (indices[i]) { 87 garray[ec++] = i; 88 } 89 } 90 91 /* make indices now point into garray */ 92 for ( i=0; i<ec; i++ ) { 93 indices[garray[i]] = i; 94 } 95 96 /* compact out the extra columns in B */ 97 for ( i=0; i<B->mbs; i++ ) { 98 for ( j=0; j<B->ilen[i]; j++ ) { 99 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 100 } 101 } 102 B->nbs = ec; 103 B->n = ec*B->bs; 104 PetscFree(indices); 105 #endif 106 107 for ( i=0,col=0; i<ec; i++ ) { 108 for ( j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j; 109 } 110 /* create local vector that is used to scatter into */ 111 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec); CHKERRQ(ierr); 112 113 /* create two temporary index sets for building scatter-gather */ 114 115 /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from); CHKERRQ(ierr); */ 116 for ( i=0,col=0; i<ec; i++ ) { 117 garray[i] = bs*garray[i]; 118 } 119 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 120 for ( i=0,col=0; i<ec; i++ ) { 121 garray[i] = garray[i]/bs; 122 } 123 124 stmp = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(stmp); 125 for ( i=0; i<ec; i++ ) { stmp[i] = bs*i; } 126 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 127 PetscFree(stmp); 128 129 /* create temporary global vector to generate scatter context */ 130 /* this is inefficient, but otherwise we must do either 131 1) save garray until the first actual scatter when the vector is known or 132 2) have another way of generating a scatter context without a vector.*/ 133 ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec); CHKERRQ(ierr); 134 135 /* gnerate the scatter context */ 136 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 137 138 /* 139 Post the receives for the first matrix vector product. We sync-chronize after 140 this on the chance that the user immediately calls MatMult() after assemblying 141 the matrix. 142 */ 143 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 144 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 145 146 PLogObjectParent(mat,baij->Mvctx); 147 PLogObjectParent(mat,baij->lvec); 148 PLogObjectParent(mat,from); 149 PLogObjectParent(mat,to); 150 baij->garray = garray; 151 PLogObjectMemory(mat,(ec+1)*sizeof(int)); 152 ierr = ISDestroy(from); CHKERRQ(ierr); 153 ierr = ISDestroy(to); CHKERRQ(ierr); 154 ierr = VecDestroy(gvec); 155 PetscFree(tmp); 156 PetscFunctionReturn(0); 157 } 158 159 160 /* 161 Takes the local part of an already assembled MPIBAIJ matrix 162 and disassembles it. This is to allow new nonzeros into the matrix 163 that require more communication in the matrix vector multiply. 164 Thus certain data-structures must be rebuilt. 165 166 Kind of slow! But that's what application programmers get when 167 they are sloppy. 168 */ 169 #undef __FUNC__ 170 #define __FUNC__ "DisAssemble_MPIBAIJ" 171 int DisAssemble_MPIBAIJ(Mat A) 172 { 173 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 174 Mat B = baij->B,Bnew; 175 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 176 int ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray; 177 int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m; 178 Scalar *a=Bbaij->a; 179 180 PetscFunctionBegin; 181 /* free stuff related to matrix-vec multiply */ 182 ierr = VecGetSize(baij->lvec,&ec); /* needed for PLogObjectMemory below */ 183 ierr = VecDestroy(baij->lvec); CHKERRQ(ierr); baij->lvec = 0; 184 ierr = VecScatterDestroy(baij->Mvctx); CHKERRQ(ierr); baij->Mvctx = 0; 185 if (baij->colmap) { 186 #if defined (USE_CTABLE) 187 TableDelete(baij->colmap); baij->colmap = 0; 188 #else 189 PetscFree(baij->colmap); baij->colmap = 0; 190 PLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 191 #endif 192 } 193 194 /* make sure that B is assembled so we can access its values */ 195 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 196 MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 197 198 /* invent new B and copy stuff over */ 199 nz = (int *) PetscMalloc( mbs*sizeof(int) ); CHKPTRQ(nz); 200 for ( i=0; i<mbs; i++ ) { 201 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 202 } 203 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew); CHKERRQ(ierr); 204 PetscFree(nz); 205 206 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 207 for ( i=0; i<mbs; i++ ) { 208 rvals[0] = bs*i; 209 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 210 for ( j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++ ) { 211 col = garray[Bbaij->j[j]]*bs; 212 for (k=0; k<bs; k++ ) { 213 ierr = MatSetValues(Bnew,bs,rvals,1,&col,a+j*bs2,B->insertmode);CHKERRQ(ierr); 214 col++; 215 } 216 } 217 } 218 PetscFree(baij->garray); baij->garray = 0; 219 PetscFree(rvals); 220 PLogObjectMemory(A,-ec*sizeof(int)); 221 ierr = MatDestroy(B); CHKERRQ(ierr); 222 PLogObjectParent(A,Bnew); 223 baij->B = Bnew; 224 A->was_assembled = PETSC_FALSE; 225 PetscFunctionReturn(0); 226 } 227 228 229